From 9108bffc9a021aa1f5137381c8f3aec47e71e319 Mon Sep 17 00:00:00 2001 From: hulk Date: Wed, 10 Jul 2024 05:43:22 +0800 Subject: [PATCH 01/57] Add support of table function WITH ORDINALITY modifier for Postgre Parser (#1337) --- src/ast/query.rs | 14 ++++++++++++ src/keywords.rs | 1 + src/parser/mod.rs | 5 ++++ src/test_utils.rs | 2 ++ tests/sqlparser_bigquery.rs | 8 ++++++- tests/sqlparser_clickhouse.rs | 2 ++ tests/sqlparser_common.rs | 43 +++++++++++++++++++++++++++++++++++ tests/sqlparser_databricks.rs | 3 ++- tests/sqlparser_duckdb.rs | 2 ++ tests/sqlparser_hive.rs | 1 + tests/sqlparser_mssql.rs | 3 +++ tests/sqlparser_mysql.rs | 5 ++++ tests/sqlparser_postgres.rs | 37 +++++++++++++++++++++++++++++- tests/sqlparser_redshift.rs | 3 +++ tests/sqlparser_snowflake.rs | 1 + tests/sqlparser_sqlite.rs | 3 ++- 16 files changed, 129 insertions(+), 4 deletions(-) diff --git a/src/ast/query.rs b/src/ast/query.rs index 70c781409..608ac2e96 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -913,6 +913,10 @@ pub enum TableFactor { /// Optional version qualifier to facilitate table time-travel, as /// supported by BigQuery and MSSQL. version: Option, + // Optional table function modifier to generate the ordinality for column. + /// For example, `SELECT * FROM generate_series(1, 10) WITH ORDINALITY AS t(a, b);` + /// [WITH ORDINALITY](https://www.postgresql.org/docs/current/functions-srf.html), supported by Postgres. + with_ordinality: bool, /// [Partition selection](https://dev.mysql.com/doc/refman/8.0/en/partitioning-selection.html), supported by MySQL. partitions: Vec, }, @@ -948,6 +952,7 @@ pub enum TableFactor { array_exprs: Vec, with_offset: bool, with_offset_alias: Option, + with_ordinality: bool, }, /// The `JSON_TABLE` table-valued function. /// Part of the SQL standard, but implemented only by MySQL, Oracle, and DB2. @@ -1293,6 +1298,7 @@ impl fmt::Display for TableFactor { with_hints, version, partitions, + with_ordinality, } => { write!(f, "{name}")?; if !partitions.is_empty() { @@ -1301,6 +1307,9 @@ impl fmt::Display for TableFactor { if let Some(args) = args { write!(f, "({})", display_comma_separated(args))?; } + if *with_ordinality { + write!(f, " WITH ORDINALITY")?; + } if let Some(alias) = alias { write!(f, " AS {alias}")?; } @@ -1354,9 +1363,14 @@ impl fmt::Display for TableFactor { array_exprs, with_offset, with_offset_alias, + with_ordinality, } => { write!(f, "UNNEST({})", display_comma_separated(array_exprs))?; + if *with_ordinality { + write!(f, " WITH ORDINALITY")?; + } + if let Some(alias) = alias { write!(f, " AS {alias}")?; } diff --git a/src/keywords.rs b/src/keywords.rs index 7146c4efe..a53eaccba 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -518,6 +518,7 @@ define_keywords!( OR, ORC, ORDER, + ORDINALITY, OUT, OUTER, OUTPUTFORMAT, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 87166f503..e89eba9b1 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -9209,6 +9209,7 @@ impl<'a> Parser<'a> { let array_exprs = self.parse_comma_separated(Parser::parse_expr)?; self.expect_token(&Token::RParen)?; + let with_ordinality = self.parse_keywords(&[Keyword::WITH, Keyword::ORDINALITY]); let alias = match self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS) { Ok(Some(alias)) => Some(alias), Ok(None) => None, @@ -9235,6 +9236,7 @@ impl<'a> Parser<'a> { array_exprs, with_offset, with_offset_alias, + with_ordinality, }) } else if self.parse_keyword_with_tokens(Keyword::JSON_TABLE, &[Token::LParen]) { let json_expr = self.parse_expr()?; @@ -9273,6 +9275,8 @@ impl<'a> Parser<'a> { None }; + let with_ordinality = self.parse_keywords(&[Keyword::WITH, Keyword::ORDINALITY]); + let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?; // MSSQL-specific table hints: @@ -9294,6 +9298,7 @@ impl<'a> Parser<'a> { with_hints, version, partitions, + with_ordinality, }; while let Some(kw) = self.parse_one_of_keywords(&[Keyword::PIVOT, Keyword::UNPIVOT]) { diff --git a/src/test_utils.rs b/src/test_utils.rs index 1a31d4611..1f5300be1 100644 --- a/src/test_utils.rs +++ b/src/test_utils.rs @@ -309,6 +309,7 @@ pub fn table(name: impl Into) -> TableFactor { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, } } @@ -323,6 +324,7 @@ pub fn table_with_alias(name: impl Into, alias: impl Into) -> Ta with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, } } diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index 88e2ef912..089a41889 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -224,6 +224,7 @@ fn parse_delete_statement() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, from[0].relation ); @@ -1353,6 +1354,7 @@ fn parse_table_identifiers() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![] },] @@ -1525,6 +1527,7 @@ fn parse_table_time_travel() { Value::SingleQuotedString(version) ))), partitions: vec![], + with_ordinality: false, }, joins: vec![] },] @@ -1551,7 +1554,8 @@ fn parse_join_constraint_unnest_alias() { Ident::new("a") ])], with_offset: false, - with_offset_alias: None + with_offset_alias: None, + with_ordinality: false, }, join_operator: JoinOperator::Inner(JoinConstraint::On(Expr::BinaryOp { left: Box::new(Expr::Identifier("c1".into())), @@ -1620,6 +1624,7 @@ fn parse_merge() { with_hints: Default::default(), version: Default::default(), partitions: Default::default(), + with_ordinality: false, }, table ); @@ -1634,6 +1639,7 @@ fn parse_merge() { with_hints: Default::default(), version: Default::default(), partitions: Default::default(), + with_ordinality: false, }, source ); diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index f6b787f5c..99db3d10c 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -59,6 +59,7 @@ fn parse_map_access_expr() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![], }], @@ -162,6 +163,7 @@ fn parse_delimited_identifiers() { args, with_hints, version, + with_ordinality: _, partitions: _, } => { assert_eq!(vec![Ident::with_quote('"', "a table")], name.0); diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 86357234c..1adda149e 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -359,6 +359,7 @@ fn parse_update_set_from() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![], }, @@ -387,6 +388,7 @@ fn parse_update_set_from() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![], }], @@ -463,6 +465,7 @@ fn parse_update_with_table_alias() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![], }, @@ -530,6 +533,7 @@ fn parse_select_with_table_alias() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![], }] @@ -566,6 +570,7 @@ fn parse_delete_statement() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, from[0].relation ); @@ -612,6 +617,7 @@ fn parse_delete_statement_for_multi_tables() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, from[0].relation ); @@ -623,6 +629,7 @@ fn parse_delete_statement_for_multi_tables() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, from[0].joins[0].relation ); @@ -648,6 +655,7 @@ fn parse_delete_statement_for_multi_tables_with_using() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, from[0].relation ); @@ -659,6 +667,7 @@ fn parse_delete_statement_for_multi_tables_with_using() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, from[1].relation ); @@ -670,6 +679,7 @@ fn parse_delete_statement_for_multi_tables_with_using() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, using[0].relation ); @@ -681,6 +691,7 @@ fn parse_delete_statement_for_multi_tables_with_using() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, using[0].joins[0].relation ); @@ -711,6 +722,7 @@ fn parse_where_delete_statement() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, from[0].relation, ); @@ -755,6 +767,7 @@ fn parse_where_delete_with_alias_statement() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, from[0].relation, ); @@ -770,6 +783,7 @@ fn parse_where_delete_with_alias_statement() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![], }]), @@ -4551,6 +4565,7 @@ fn test_parse_named_window() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![], }], @@ -4933,6 +4948,7 @@ fn parse_interval_and_or_xor() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![], }], @@ -5286,6 +5302,7 @@ fn parse_unnest_in_from_clause() { array_exprs: vec![Expr::Identifier(Ident::new("expr"))], with_offset: true, with_offset_alias: None, + with_ordinality: false, }, joins: vec![], }], @@ -5303,6 +5320,7 @@ fn parse_unnest_in_from_clause() { array_exprs: vec![Expr::Identifier(Ident::new("expr"))], with_offset: false, with_offset_alias: None, + with_ordinality: false, }, joins: vec![], }], @@ -5320,6 +5338,7 @@ fn parse_unnest_in_from_clause() { array_exprs: vec![Expr::Identifier(Ident::new("expr"))], with_offset: true, with_offset_alias: None, + with_ordinality: false, }, joins: vec![], }], @@ -5340,6 +5359,7 @@ fn parse_unnest_in_from_clause() { array_exprs: vec![Expr::Identifier(Ident::new("expr"))], with_offset: false, with_offset_alias: None, + with_ordinality: false, }, joins: vec![], }], @@ -5364,6 +5384,7 @@ fn parse_unnest_in_from_clause() { )], with_offset: false, with_offset_alias: None, + with_ordinality: false, }, joins: vec![], }], @@ -5394,6 +5415,7 @@ fn parse_unnest_in_from_clause() { ], with_offset: false, with_offset_alias: None, + with_ordinality: false, }, joins: vec![], }], @@ -5503,6 +5525,7 @@ fn parse_implicit_join() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![], }, @@ -5514,6 +5537,7 @@ fn parse_implicit_join() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![], }, @@ -5533,6 +5557,7 @@ fn parse_implicit_join() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![Join { relation: TableFactor::Table { @@ -5542,6 +5567,7 @@ fn parse_implicit_join() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, join_operator: JoinOperator::Inner(JoinConstraint::Natural), }], @@ -5554,6 +5580,7 @@ fn parse_implicit_join() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![Join { relation: TableFactor::Table { @@ -5563,6 +5590,7 @@ fn parse_implicit_join() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, join_operator: JoinOperator::Inner(JoinConstraint::Natural), }], @@ -5585,6 +5613,7 @@ fn parse_cross_join() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, join_operator: JoinOperator::CrossJoin, }, @@ -5607,6 +5636,7 @@ fn parse_joins_on() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, join_operator: f(JoinConstraint::On(Expr::BinaryOp { left: Box::new(Expr::Identifier("c1".into())), @@ -5678,6 +5708,7 @@ fn parse_joins_using() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, join_operator: f(JoinConstraint::Using(vec!["c1".into()])), } @@ -5741,6 +5772,7 @@ fn parse_natural_join() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, join_operator: f(JoinConstraint::Natural), } @@ -6008,6 +6040,7 @@ fn parse_derived_tables() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, join_operator: JoinOperator::Inner(JoinConstraint::Natural), }], @@ -6905,6 +6938,7 @@ fn lateral_function() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![Join { relation: TableFactor::Function { @@ -7613,6 +7647,7 @@ fn parse_merge() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, } ); assert_eq!(table, table_no_into); @@ -7638,6 +7673,7 @@ fn parse_merge() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![], }], @@ -8700,6 +8736,7 @@ fn parse_pivot_table() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }), aggregate_functions: vec![ expected_function("a", None), @@ -8769,6 +8806,7 @@ fn parse_unpivot_table() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }), value: Ident { value: "quantity".to_string(), @@ -8835,6 +8873,7 @@ fn parse_pivot_unpivot_table() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }), value: Ident { value: "population".to_string(), @@ -9159,6 +9198,7 @@ fn parse_unload() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![], }], @@ -9304,6 +9344,7 @@ fn parse_connect_by() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![], }], @@ -9389,6 +9430,7 @@ fn parse_connect_by() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![], }], @@ -9548,6 +9590,7 @@ fn test_match_recognize() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }; fn check(options: &str, expect: TableFactor) { diff --git a/tests/sqlparser_databricks.rs b/tests/sqlparser_databricks.rs index 90056f0f7..280b97b49 100644 --- a/tests/sqlparser_databricks.rs +++ b/tests/sqlparser_databricks.rs @@ -177,7 +177,8 @@ fn test_values_clause() { args: None, with_hints: vec![], version: None, - partitions: vec![] + partitions: vec![], + with_ordinality: false, }), query .body diff --git a/tests/sqlparser_duckdb.rs b/tests/sqlparser_duckdb.rs index 400daa8a8..0e61b86c9 100644 --- a/tests/sqlparser_duckdb.rs +++ b/tests/sqlparser_duckdb.rs @@ -166,6 +166,7 @@ fn test_select_union_by_name() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![], }], @@ -205,6 +206,7 @@ fn test_select_union_by_name() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![], }], diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index 53280d7d8..5f0b9f575 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -359,6 +359,7 @@ fn parse_delimited_identifiers() { args, with_hints, version, + with_ordinality: _, partitions: _, } => { assert_eq!(vec![Ident::with_quote('"', "a table")], name.0); diff --git a/tests/sqlparser_mssql.rs b/tests/sqlparser_mssql.rs index 6968347ec..26bece81d 100644 --- a/tests/sqlparser_mssql.rs +++ b/tests/sqlparser_mssql.rs @@ -64,6 +64,7 @@ fn parse_table_time_travel() { Value::SingleQuotedString(version) ))), partitions: vec![], + with_ordinality: false, }, joins: vec![] },] @@ -335,6 +336,7 @@ fn parse_delimited_identifiers() { args, with_hints, version, + with_ordinality: _, partitions: _, } => { assert_eq!(vec![Ident::with_quote('"', "a table")], name.0); @@ -526,6 +528,7 @@ fn parse_substring_in_select() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![] }], diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index 74def31bf..ec094bcd6 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -1728,6 +1728,7 @@ fn parse_select_with_numeric_prefix_column_name() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![] }], @@ -1782,6 +1783,7 @@ fn parse_select_with_concatenation_of_exp_number_and_numeric_prefix_column() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![] }], @@ -1847,6 +1849,7 @@ fn parse_update_with_joins() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![Join { relation: TableFactor::Table { @@ -1859,6 +1862,7 @@ fn parse_update_with_joins() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, join_operator: JoinOperator::Inner(JoinConstraint::On(Expr::BinaryOp { left: Box::new(Expr::CompoundIdentifier(vec![ @@ -2282,6 +2286,7 @@ fn parse_substring_in_select() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![] }], diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 9af4f4d6c..164bb72c7 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -3501,6 +3501,7 @@ fn parse_delimited_identifiers() { args, with_hints, version, + with_ordinality: _, partitions: _, } => { assert_eq!(vec![Ident::with_quote('"', "a table")], name.0); @@ -4054,7 +4055,8 @@ fn parse_join_constraint_unnest_alias() { Ident::new("a") ])], with_offset: false, - with_offset_alias: None + with_offset_alias: None, + with_ordinality: false, }, join_operator: JoinOperator::Inner(JoinConstraint::On(Expr::BinaryOp { left: Box::new(Expr::Identifier("c1".into())), @@ -4362,3 +4364,36 @@ fn parse_create_table_with_options() { _ => unreachable!(), } } + +#[test] +fn test_table_function_with_ordinality() { + let from = pg_and_generic() + .verified_only_select("SELECT * FROM generate_series(1, 10) WITH ORDINALITY AS t") + .from; + assert_eq!(1, from.len()); + match from[0].relation { + TableFactor::Table { + ref name, + with_ordinality: true, + .. + } => { + assert_eq!("generate_series", name.to_string().as_str()); + } + _ => panic!("Expecting TableFactor::Table with ordinality"), + } +} + +#[test] +fn test_table_unnest_with_ordinality() { + let from = pg_and_generic() + .verified_only_select("SELECT * FROM UNNEST([10, 20, 30]) WITH ORDINALITY AS t") + .from; + assert_eq!(1, from.len()); + match from[0].relation { + TableFactor::UNNEST { + with_ordinality: true, + .. + } => {} + _ => panic!("Expecting TableFactor::UNNEST with ordinality"), + } +} diff --git a/tests/sqlparser_redshift.rs b/tests/sqlparser_redshift.rs index 938e6e887..440116e02 100644 --- a/tests/sqlparser_redshift.rs +++ b/tests/sqlparser_redshift.rs @@ -48,6 +48,7 @@ fn test_square_brackets_over_db_schema_table_name() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![], } @@ -94,6 +95,7 @@ fn test_double_quotes_over_db_schema_table_name() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![], } @@ -114,6 +116,7 @@ fn parse_delimited_identifiers() { args, with_hints, version, + with_ordinality: _, partitions: _, } => { assert_eq!(vec![Ident::with_quote('"', "a table")], name.0); diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index 2f4ed1316..7a2288cbb 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -870,6 +870,7 @@ fn parse_delimited_identifiers() { args, with_hints, version, + with_ordinality: _, partitions: _, } => { assert_eq!(vec![Ident::with_quote('"', "a table")], name.0); diff --git a/tests/sqlparser_sqlite.rs b/tests/sqlparser_sqlite.rs index dd1e77d5d..629ab5fc2 100644 --- a/tests/sqlparser_sqlite.rs +++ b/tests/sqlparser_sqlite.rs @@ -399,7 +399,8 @@ fn parse_update_tuple_row_values() { args: None, with_hints: vec![], version: None, - partitions: vec![] + partitions: vec![], + with_ordinality: false, }, joins: vec![], }, From 993216f3ac279e1e86a16de8696e60dc78d5a418 Mon Sep 17 00:00:00 2001 From: hulk Date: Sat, 13 Jul 2024 17:46:26 +0800 Subject: [PATCH 02/57] Enable PARTITION BY feature for PostgreSQL while parsing the create table statement (#1338) --- src/ast/helpers/stmt_create_table.rs | 4 +- src/parser/mod.rs | 59 +++++++++++++++------------- tests/sqlparser_postgres.rs | 44 +++++++++++++++++++++ 3 files changed, 77 insertions(+), 30 deletions(-) diff --git a/src/ast/helpers/stmt_create_table.rs b/src/ast/helpers/stmt_create_table.rs index d862a36ae..92c75e6a4 100644 --- a/src/ast/helpers/stmt_create_table.rs +++ b/src/ast/helpers/stmt_create_table.rs @@ -496,9 +496,9 @@ impl TryFrom for CreateTableBuilder { } } -/// Helper return type when parsing configuration for a BigQuery `CREATE TABLE` statement. +/// Helper return type when parsing configuration for a `CREATE TABLE` statement. #[derive(Default)] -pub(crate) struct BigQueryTableConfiguration { +pub(crate) struct CreateTableConfiguration { pub partition_by: Option>, pub cluster_by: Option>>, pub options: Option>, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index e89eba9b1..4d2319a08 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -31,7 +31,7 @@ use recursion::RecursionCounter; use IsLateral::*; use IsOptional::*; -use crate::ast::helpers::stmt_create_table::{BigQueryTableConfiguration, CreateTableBuilder}; +use crate::ast::helpers::stmt_create_table::{CreateTableBuilder, CreateTableConfiguration}; use crate::ast::*; use crate::dialect::*; use crate::keywords::{Keyword, ALL_KEYWORDS}; @@ -5416,11 +5416,7 @@ impl<'a> Parser<'a> { None }; - let big_query_config = if dialect_of!(self is BigQueryDialect | GenericDialect) { - self.parse_optional_big_query_create_table_config()? - } else { - Default::default() - }; + let create_table_config = self.parse_optional_create_table_config()?; // Parse optional `AS ( query )` let query = if self.parse_keyword(Keyword::AS) { @@ -5505,39 +5501,46 @@ impl<'a> Parser<'a> { .collation(collation) .on_commit(on_commit) .on_cluster(on_cluster) - .partition_by(big_query_config.partition_by) - .cluster_by(big_query_config.cluster_by) - .options(big_query_config.options) + .partition_by(create_table_config.partition_by) + .cluster_by(create_table_config.cluster_by) + .options(create_table_config.options) .primary_key(primary_key) .strict(strict) .build()) } - /// Parse configuration like partitioning, clustering information during big-query table creation. - /// - fn parse_optional_big_query_create_table_config( + /// Parse configuration like partitioning, clustering information during the table creation. + /// + /// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#syntax_2) + /// [PostgreSQL](https://www.postgresql.org/docs/current/ddl-partitioning.html) + fn parse_optional_create_table_config( &mut self, - ) -> Result { - let mut partition_by = None; - if self.parse_keywords(&[Keyword::PARTITION, Keyword::BY]) { - partition_by = Some(Box::new(self.parse_expr()?)); + ) -> Result { + let partition_by = if dialect_of!(self is BigQueryDialect | PostgreSqlDialect | GenericDialect) + && self.parse_keywords(&[Keyword::PARTITION, Keyword::BY]) + { + Some(Box::new(self.parse_expr()?)) + } else { + None }; let mut cluster_by = None; - if self.parse_keywords(&[Keyword::CLUSTER, Keyword::BY]) { - cluster_by = Some(WrappedCollection::NoWrapping( - self.parse_comma_separated(|p| p.parse_identifier(false))?, - )); - }; - let mut options = None; - if let Token::Word(word) = self.peek_token().token { - if word.keyword == Keyword::OPTIONS { - options = Some(self.parse_options(Keyword::OPTIONS)?); - } - }; + if dialect_of!(self is BigQueryDialect | GenericDialect) { + if self.parse_keywords(&[Keyword::CLUSTER, Keyword::BY]) { + cluster_by = Some(WrappedCollection::NoWrapping( + self.parse_comma_separated(|p| p.parse_identifier(false))?, + )); + }; + + if let Token::Word(word) = self.peek_token().token { + if word.keyword == Keyword::OPTIONS { + options = Some(self.parse_options(Keyword::OPTIONS)?); + } + }; + } - Ok(BigQueryTableConfiguration { + Ok(CreateTableConfiguration { partition_by, cluster_by, options, diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 164bb72c7..ed17e9d8f 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -4039,6 +4039,50 @@ fn parse_create_table_with_alias() { } } +#[test] +fn parse_create_table_with_partition_by() { + let sql = "CREATE TABLE t1 (a INT, b TEXT) PARTITION BY RANGE(a)"; + match pg_and_generic().verified_stmt(sql) { + Statement::CreateTable(create_table) => { + assert_eq!("t1", create_table.name.to_string()); + assert_eq!( + vec![ + ColumnDef { + name: "a".into(), + data_type: DataType::Int(None), + collation: None, + options: vec![] + }, + ColumnDef { + name: "b".into(), + data_type: DataType::Text, + collation: None, + options: vec![] + } + ], + create_table.columns + ); + match *create_table.partition_by.unwrap() { + Expr::Function(f) => { + assert_eq!("RANGE", f.name.to_string()); + assert_eq!( + FunctionArguments::List(FunctionArgumentList { + duplicate_treatment: None, + clauses: vec![], + args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr( + Expr::Identifier(Ident::new("a")) + ))], + }), + f.args + ); + } + _ => unreachable!(), + } + } + _ => unreachable!(), + } +} + #[test] fn parse_join_constraint_unnest_alias() { assert_eq!( From 20f7ac59e38d52e293476b7ad844e7f744a16c43 Mon Sep 17 00:00:00 2001 From: hulk Date: Tue, 16 Jul 2024 01:54:44 +0800 Subject: [PATCH 03/57] Fix AS query clause should be after the create table options (#1339) --- src/ast/dml.rs | 6 +++--- src/parser/mod.rs | 14 +++++++------- tests/sqlparser_clickhouse.rs | 24 ++++++++++++++++++++++++ tests/sqlparser_mysql.rs | 27 +++++++++++++++++++++++++++ 4 files changed, 61 insertions(+), 10 deletions(-) diff --git a/src/ast/dml.rs b/src/ast/dml.rs index b35b2b970..0ebbaa3e9 100644 --- a/src/ast/dml.rs +++ b/src/ast/dml.rs @@ -418,9 +418,6 @@ impl Display for CreateTable { write!(f, " WITH TAG ({})", display_comma_separated(tag.as_slice()))?; } - if let Some(query) = &self.query { - write!(f, " AS {query}")?; - } if let Some(default_charset) = &self.default_charset { write!(f, " DEFAULT CHARSET={default_charset}")?; } @@ -440,6 +437,9 @@ impl Display for CreateTable { if self.strict { write!(f, " STRICT")?; } + if let Some(query) = &self.query { + write!(f, " AS {query}")?; + } Ok(()) } } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 4d2319a08..d00f28a55 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -5418,13 +5418,6 @@ impl<'a> Parser<'a> { let create_table_config = self.parse_optional_create_table_config()?; - // Parse optional `AS ( query )` - let query = if self.parse_keyword(Keyword::AS) { - Some(self.parse_boxed_query()?) - } else { - None - }; - let default_charset = if self.parse_keywords(&[Keyword::DEFAULT, Keyword::CHARSET]) { self.expect_token(&Token::Eq)?; let next_token = self.next_token(); @@ -5477,6 +5470,13 @@ impl<'a> Parser<'a> { None }; + // Parse optional `AS ( query )` + let query = if self.parse_keyword(Keyword::AS) { + Some(self.parse_boxed_query()?) + } else { + None + }; + Ok(CreateTableBuilder::new(table_name) .temporary(temporary) .columns(columns) diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index 99db3d10c..752940551 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -802,6 +802,30 @@ fn test_query_with_format_clause() { } } +#[test] +fn parse_create_table_on_commit_and_as_query() { + let sql = r#"CREATE LOCAL TEMPORARY TABLE test ON COMMIT PRESERVE ROWS AS SELECT 1"#; + match clickhouse_and_generic().verified_stmt(sql) { + Statement::CreateTable(CreateTable { + name, + on_commit, + query, + .. + }) => { + assert_eq!(name.to_string(), "test"); + assert_eq!(on_commit, Some(OnCommit::PreserveRows)); + assert_eq!( + query.unwrap().body.as_select().unwrap().projection, + vec![UnnamedExpr(Expr::Value(Value::Number( + "1".parse().unwrap(), + false + )))] + ); + } + _ => unreachable!(), + } +} + fn clickhouse() -> TestedDialects { TestedDialects { dialects: vec![Box::new(ClickHouseDialect {})], diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index ec094bcd6..c2ce407a7 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -812,6 +812,33 @@ fn parse_create_table_collate() { } } +#[test] +fn parse_create_table_both_options_and_as_query() { + let sql = "CREATE TABLE foo (id INT(11)) ENGINE=InnoDB DEFAULT CHARSET=utf8mb3 COLLATE=utf8mb4_0900_ai_ci AS SELECT 1"; + match mysql_and_generic().verified_stmt(sql) { + Statement::CreateTable(CreateTable { + name, + collation, + query, + .. + }) => { + assert_eq!(name.to_string(), "foo"); + assert_eq!(collation, Some("utf8mb4_0900_ai_ci".to_string())); + assert_eq!( + query.unwrap().body.as_select().unwrap().projection, + vec![SelectItem::UnnamedExpr(Expr::Value(number("1")))] + ); + } + _ => unreachable!(), + } + + let sql = r"CREATE TABLE foo (id INT(11)) ENGINE=InnoDB AS SELECT 1 DEFAULT CHARSET=utf8mb3"; + assert!(matches!( + mysql_and_generic().parse_sql_statements(sql), + Err(ParserError::ParserError(_)) + )); +} + #[test] fn parse_create_table_comment_character_set() { let sql = "CREATE TABLE foo (s TEXT CHARACTER SET utf8mb4 COMMENT 'comment')"; From 845a1aaddd371a586c41ab9b68ad21a4bbc3884f Mon Sep 17 00:00:00 2001 From: Nick Presta Date: Sat, 20 Jul 2024 06:51:12 -0400 Subject: [PATCH 04/57] [ClickHouse] Add support for WITH FILL to OrderByExpr (#1330) Co-authored-by: Andrew Lamb --- src/ast/mod.rs | 17 ++-- src/ast/query.rs | 91 +++++++++++++++++- src/keywords.rs | 3 + src/parser/mod.rs | 84 ++++++++++++++++- tests/sqlparser_clickhouse.rs | 169 ++++++++++++++++++++++++++++++++++ tests/sqlparser_common.rs | 35 +++++-- tests/sqlparser_mssql.rs | 4 +- tests/sqlparser_mysql.rs | 31 ++++--- tests/sqlparser_postgres.rs | 10 +- 9 files changed, 397 insertions(+), 47 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index b8d72e233..2a519fc7c 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -43,14 +43,15 @@ pub use self::operator::{BinaryOperator, UnaryOperator}; pub use self::query::{ AfterMatchSkip, ConnectBy, Cte, CteAsMaterialized, Distinct, EmptyMatchesMode, ExceptSelectItem, ExcludeSelectItem, ExprWithAlias, Fetch, ForClause, ForJson, ForXml, - FormatClause, GroupByExpr, GroupByWithModifier, IdentWithAlias, IlikeSelectItem, Join, - JoinConstraint, JoinOperator, JsonTableColumn, JsonTableColumnErrorHandling, LateralView, - LockClause, LockType, MatchRecognizePattern, MatchRecognizeSymbol, Measure, - NamedWindowDefinition, NamedWindowExpr, NonBlock, Offset, OffsetRows, OrderByExpr, - PivotValueSource, Query, RenameSelectItem, RepetitionQuantifier, ReplaceSelectElement, - ReplaceSelectItem, RowsPerMatch, Select, SelectInto, SelectItem, SetExpr, SetOperator, - SetQuantifier, Setting, SymbolDefinition, Table, TableAlias, TableFactor, TableVersion, - TableWithJoins, Top, TopQuantity, ValueTableMode, Values, WildcardAdditionalOptions, With, + FormatClause, GroupByExpr, GroupByWithModifier, IdentWithAlias, IlikeSelectItem, Interpolate, + InterpolateExpr, Join, JoinConstraint, JoinOperator, JsonTableColumn, + JsonTableColumnErrorHandling, LateralView, LockClause, LockType, MatchRecognizePattern, + MatchRecognizeSymbol, Measure, NamedWindowDefinition, NamedWindowExpr, NonBlock, Offset, + OffsetRows, OrderBy, OrderByExpr, PivotValueSource, Query, RenameSelectItem, + RepetitionQuantifier, ReplaceSelectElement, ReplaceSelectItem, RowsPerMatch, Select, + SelectInto, SelectItem, SetExpr, SetOperator, SetQuantifier, Setting, SymbolDefinition, Table, + TableAlias, TableFactor, TableVersion, TableWithJoins, Top, TopQuantity, ValueTableMode, + Values, WildcardAdditionalOptions, With, WithFill, }; pub use self::value::{ escape_double_quote_string, escape_quoted_string, DateTimeField, DollarQuotedString, diff --git a/src/ast/query.rs b/src/ast/query.rs index 608ac2e96..978604266 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -33,7 +33,7 @@ pub struct Query { /// SELECT or UNION / EXCEPT / INTERSECT pub body: Box, /// ORDER BY - pub order_by: Vec, + pub order_by: Option, /// `LIMIT { | ALL }` pub limit: Option, @@ -67,8 +67,17 @@ impl fmt::Display for Query { write!(f, "{with} ")?; } write!(f, "{}", self.body)?; - if !self.order_by.is_empty() { - write!(f, " ORDER BY {}", display_comma_separated(&self.order_by))?; + if let Some(ref order_by) = self.order_by { + write!(f, " ORDER BY")?; + if !order_by.exprs.is_empty() { + write!(f, " {}", display_comma_separated(&order_by.exprs))?; + } + if let Some(ref interpolate) = order_by.interpolate { + match &interpolate.exprs { + Some(exprs) => write!(f, " INTERPOLATE ({})", display_comma_separated(exprs))?, + None => write!(f, " INTERPOLATE")?, + } + } } if let Some(ref limit) = self.limit { write!(f, " LIMIT {limit}")?; @@ -1668,6 +1677,18 @@ pub enum JoinConstraint { None, } +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct OrderBy { + pub exprs: Vec, + /// Optional: `INTERPOLATE` + /// Supported by [ClickHouse syntax] + /// + /// [ClickHouse syntax]: + pub interpolate: Option, +} + /// An `ORDER BY` expression #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] @@ -1678,6 +1699,9 @@ pub struct OrderByExpr { pub asc: Option, /// Optional `NULLS FIRST` or `NULLS LAST` pub nulls_first: Option, + /// Optional: `WITH FILL` + /// Supported by [ClickHouse syntax]: + pub with_fill: Option, } impl fmt::Display for OrderByExpr { @@ -1693,6 +1717,67 @@ impl fmt::Display for OrderByExpr { Some(false) => write!(f, " NULLS LAST")?, None => (), } + if let Some(ref with_fill) = self.with_fill { + write!(f, " {}", with_fill)? + } + Ok(()) + } +} + +/// ClickHouse `WITH FILL` modifier for `ORDER BY` clause. +/// Supported by [ClickHouse syntax] +/// +/// [ClickHouse syntax]: +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct WithFill { + pub from: Option, + pub to: Option, + pub step: Option, +} + +impl fmt::Display for WithFill { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "WITH FILL")?; + if let Some(ref from) = self.from { + write!(f, " FROM {}", from)?; + } + if let Some(ref to) = self.to { + write!(f, " TO {}", to)?; + } + if let Some(ref step) = self.step { + write!(f, " STEP {}", step)?; + } + Ok(()) + } +} + +/// ClickHouse `INTERPOLATE` clause for use in `ORDER BY` clause when using `WITH FILL` modifier. +/// Supported by [ClickHouse syntax] +/// +/// [ClickHouse syntax]: +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct InterpolateExpr { + pub column: Ident, + pub expr: Option, +} + +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct Interpolate { + pub exprs: Option>, +} + +impl fmt::Display for InterpolateExpr { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}", self.column)?; + if let Some(ref expr) = self.expr { + write!(f, " AS {}", expr)?; + } Ok(()) } } diff --git a/src/keywords.rs b/src/keywords.rs index a53eaccba..2b6900fba 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -297,6 +297,7 @@ define_keywords!( FILE, FILES, FILE_FORMAT, + FILL, FILTER, FIRST, FIRST_VALUE, @@ -382,6 +383,7 @@ define_keywords!( INT64, INT8, INTEGER, + INTERPOLATE, INTERSECT, INTERSECTION, INTERVAL, @@ -682,6 +684,7 @@ define_keywords!( STDDEV_SAMP, STDIN, STDOUT, + STEP, STORAGE_INTEGRATION, STORED, STRICT, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index d00f28a55..fb15275e9 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -7934,7 +7934,7 @@ impl<'a> Parser<'a> { body: self.parse_insert_setexpr_boxed()?, limit: None, limit_by: vec![], - order_by: vec![], + order_by: None, offset: None, fetch: None, locks: vec![], @@ -7948,7 +7948,7 @@ impl<'a> Parser<'a> { body: self.parse_update_setexpr_boxed()?, limit: None, limit_by: vec![], - order_by: vec![], + order_by: None, offset: None, fetch: None, locks: vec![], @@ -7960,9 +7960,19 @@ impl<'a> Parser<'a> { let body = self.parse_boxed_query_body(0)?; let order_by = if self.parse_keywords(&[Keyword::ORDER, Keyword::BY]) { - self.parse_comma_separated(Parser::parse_order_by_expr)? + let order_by_exprs = self.parse_comma_separated(Parser::parse_order_by_expr)?; + let interpolate = if dialect_of!(self is ClickHouseDialect | GenericDialect) { + self.parse_interpolations()? + } else { + None + }; + + Some(OrderBy { + exprs: order_by_exprs, + interpolate, + }) } else { - vec![] + None }; let mut limit = None; @@ -9193,7 +9203,7 @@ impl<'a> Parser<'a> { subquery: Box::new(Query { with: None, body: Box::new(values), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, @@ -10519,13 +10529,77 @@ impl<'a> Parser<'a> { None }; + let with_fill = if dialect_of!(self is ClickHouseDialect | GenericDialect) + && self.parse_keywords(&[Keyword::WITH, Keyword::FILL]) + { + Some(self.parse_with_fill()?) + } else { + None + }; + Ok(OrderByExpr { expr, asc, nulls_first, + with_fill, }) } + // Parse a WITH FILL clause (ClickHouse dialect) + // that follow the WITH FILL keywords in a ORDER BY clause + pub fn parse_with_fill(&mut self) -> Result { + let from = if self.parse_keyword(Keyword::FROM) { + Some(self.parse_expr()?) + } else { + None + }; + + let to = if self.parse_keyword(Keyword::TO) { + Some(self.parse_expr()?) + } else { + None + }; + + let step = if self.parse_keyword(Keyword::STEP) { + Some(self.parse_expr()?) + } else { + None + }; + + Ok(WithFill { from, to, step }) + } + + // Parse a set of comma seperated INTERPOLATE expressions (ClickHouse dialect) + // that follow the INTERPOLATE keyword in an ORDER BY clause with the WITH FILL modifier + pub fn parse_interpolations(&mut self) -> Result, ParserError> { + if !self.parse_keyword(Keyword::INTERPOLATE) { + return Ok(None); + } + + if self.consume_token(&Token::LParen) { + let interpolations = self.parse_comma_separated0(|p| p.parse_interpolation())?; + self.expect_token(&Token::RParen)?; + // INTERPOLATE () and INTERPOLATE ( ... ) variants + return Ok(Some(Interpolate { + exprs: Some(interpolations), + })); + } + + // INTERPOLATE + Ok(Some(Interpolate { exprs: None })) + } + + // Parse a INTERPOLATE expression (ClickHouse dialect) + pub fn parse_interpolation(&mut self) -> Result { + let column = self.parse_identifier(false)?; + let expr = if self.parse_keyword(Keyword::AS) { + Some(self.parse_expr()?) + } else { + None + }; + Ok(InterpolateExpr { column, expr }) + } + /// Parse a TOP clause, MSSQL equivalent of LIMIT, /// that follows after `SELECT [DISTINCT]`. pub fn parse_top(&mut self) -> Result { diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index 752940551..10d7d66ff 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -720,6 +720,175 @@ fn parse_group_by_with_modifier() { } } +#[test] +fn parse_select_order_by_with_fill_interpolate() { + let sql = "SELECT id, fname, lname FROM customer WHERE id < 5 \ + ORDER BY \ + fname ASC NULLS FIRST WITH FILL FROM 10 TO 20 STEP 2, \ + lname DESC NULLS LAST WITH FILL FROM 30 TO 40 STEP 3 \ + INTERPOLATE (col1 AS col1 + 1) \ + LIMIT 2"; + let select = clickhouse().verified_query(sql); + assert_eq!( + OrderBy { + exprs: vec![ + OrderByExpr { + expr: Expr::Identifier(Ident::new("fname")), + asc: Some(true), + nulls_first: Some(true), + with_fill: Some(WithFill { + from: Some(Expr::Value(number("10"))), + to: Some(Expr::Value(number("20"))), + step: Some(Expr::Value(number("2"))), + }), + }, + OrderByExpr { + expr: Expr::Identifier(Ident::new("lname")), + asc: Some(false), + nulls_first: Some(false), + with_fill: Some(WithFill { + from: Some(Expr::Value(number("30"))), + to: Some(Expr::Value(number("40"))), + step: Some(Expr::Value(number("3"))), + }), + }, + ], + interpolate: Some(Interpolate { + exprs: Some(vec![InterpolateExpr { + column: Ident::new("col1"), + expr: Some(Expr::BinaryOp { + left: Box::new(Expr::Identifier(Ident::new("col1"))), + op: BinaryOperator::Plus, + right: Box::new(Expr::Value(number("1"))), + }), + }]) + }) + }, + select.order_by.expect("ORDER BY expected") + ); + assert_eq!(Some(Expr::Value(number("2"))), select.limit); +} + +#[test] +fn parse_select_order_by_with_fill_interpolate_multi_interpolates() { + let sql = "SELECT id, fname, lname FROM customer ORDER BY fname WITH FILL \ + INTERPOLATE (col1 AS col1 + 1) INTERPOLATE (col2 AS col2 + 2)"; + clickhouse_and_generic() + .parse_sql_statements(sql) + .expect_err("ORDER BY only accepts a single INTERPOLATE clause"); +} + +#[test] +fn parse_select_order_by_with_fill_interpolate_multi_with_fill_interpolates() { + let sql = "SELECT id, fname, lname FROM customer \ + ORDER BY \ + fname WITH FILL INTERPOLATE (col1 AS col1 + 1), \ + lname WITH FILL INTERPOLATE (col2 AS col2 + 2)"; + clickhouse_and_generic() + .parse_sql_statements(sql) + .expect_err("ORDER BY only accepts a single INTERPOLATE clause"); +} + +#[test] +fn parse_select_order_by_interpolate_not_last() { + let sql = "SELECT id, fname, lname FROM customer \ + ORDER BY \ + fname INTERPOLATE (col2 AS col2 + 2), + lname"; + clickhouse_and_generic() + .parse_sql_statements(sql) + .expect_err("ORDER BY INTERPOLATE must be in the last position"); +} + +#[test] +fn parse_with_fill() { + let sql = "SELECT fname FROM customer ORDER BY fname \ + WITH FILL FROM 10 TO 20 STEP 2"; + let select = clickhouse().verified_query(sql); + assert_eq!( + Some(WithFill { + from: Some(Expr::Value(number("10"))), + to: Some(Expr::Value(number("20"))), + step: Some(Expr::Value(number("2"))), + }), + select.order_by.expect("ORDER BY expected").exprs[0].with_fill + ); +} + +#[test] +fn parse_with_fill_missing_single_argument() { + let sql = "SELECT id, fname, lname FROM customer ORDER BY \ + fname WITH FILL FROM TO 20"; + clickhouse_and_generic() + .parse_sql_statements(sql) + .expect_err("WITH FILL requires expressions for all arguments"); +} + +#[test] +fn parse_with_fill_multiple_incomplete_arguments() { + let sql = "SELECT id, fname, lname FROM customer ORDER BY \ + fname WITH FILL FROM TO 20, lname WITH FILL FROM TO STEP 1"; + clickhouse_and_generic() + .parse_sql_statements(sql) + .expect_err("WITH FILL requires expressions for all arguments"); +} + +#[test] +fn parse_interpolate_body_with_columns() { + let sql = "SELECT fname FROM customer ORDER BY fname WITH FILL \ + INTERPOLATE (col1 AS col1 + 1, col2 AS col3, col4 AS col4 + 4)"; + let select = clickhouse().verified_query(sql); + assert_eq!( + Some(Interpolate { + exprs: Some(vec![ + InterpolateExpr { + column: Ident::new("col1"), + expr: Some(Expr::BinaryOp { + left: Box::new(Expr::Identifier(Ident::new("col1"))), + op: BinaryOperator::Plus, + right: Box::new(Expr::Value(number("1"))), + }), + }, + InterpolateExpr { + column: Ident::new("col2"), + expr: Some(Expr::Identifier(Ident::new("col3"))), + }, + InterpolateExpr { + column: Ident::new("col4"), + expr: Some(Expr::BinaryOp { + left: Box::new(Expr::Identifier(Ident::new("col4"))), + op: BinaryOperator::Plus, + right: Box::new(Expr::Value(number("4"))), + }), + }, + ]) + }), + select.order_by.expect("ORDER BY expected").interpolate + ); +} + +#[test] +fn parse_interpolate_without_body() { + let sql = "SELECT fname FROM customer ORDER BY fname WITH FILL INTERPOLATE"; + let select = clickhouse().verified_query(sql); + assert_eq!( + Some(Interpolate { exprs: None }), + select.order_by.expect("ORDER BY expected").interpolate + ); +} + +#[test] +fn parse_interpolate_with_empty_body() { + let sql = "SELECT fname FROM customer ORDER BY fname WITH FILL INTERPOLATE ()"; + let select = clickhouse().verified_query(sql); + assert_eq!( + Some(Interpolate { + exprs: Some(vec![]) + }), + select.order_by.expect("ORDER BY expected").interpolate + ); +} + #[test] fn test_prewhere() { match clickhouse_and_generic().verified_stmt("SELECT * FROM t PREWHERE x = 1 WHERE y = 2") { diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 1adda149e..125e5f1f8 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -409,7 +409,7 @@ fn parse_update_set_from() { value_table_mode: None, connect_by: None, }))), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, @@ -2065,19 +2065,22 @@ fn parse_select_order_by() { expr: Expr::Identifier(Ident::new("lname")), asc: Some(true), nulls_first: None, + with_fill: None, }, OrderByExpr { expr: Expr::Identifier(Ident::new("fname")), asc: Some(false), nulls_first: None, + with_fill: None, }, OrderByExpr { expr: Expr::Identifier(Ident::new("id")), asc: None, nulls_first: None, + with_fill: None, }, ], - select.order_by + select.order_by.expect("ORDER BY expected").exprs ); } chk("SELECT id, fname, lname FROM customer WHERE id < 5 ORDER BY lname ASC, fname DESC, id"); @@ -2097,14 +2100,16 @@ fn parse_select_order_by_limit() { expr: Expr::Identifier(Ident::new("lname")), asc: Some(true), nulls_first: None, + with_fill: None, }, OrderByExpr { expr: Expr::Identifier(Ident::new("fname")), asc: Some(false), nulls_first: None, + with_fill: None, }, ], - select.order_by + select.order_by.expect("ORDER BY expected").exprs ); assert_eq!(Some(Expr::Value(number("2"))), select.limit); } @@ -2120,14 +2125,16 @@ fn parse_select_order_by_nulls_order() { expr: Expr::Identifier(Ident::new("lname")), asc: Some(true), nulls_first: Some(true), + with_fill: None, }, OrderByExpr { expr: Expr::Identifier(Ident::new("fname")), asc: Some(false), nulls_first: Some(false), + with_fill: None, }, ], - select.order_by + select.order_by.expect("ORDER BY expeccted").exprs ); assert_eq!(Some(Expr::Value(number("2"))), select.limit); } @@ -2219,6 +2226,7 @@ fn parse_select_qualify() { expr: Expr::Identifier(Ident::new("o")), asc: None, nulls_first: None, + with_fill: None, }], window_frame: None, })), @@ -2579,6 +2587,7 @@ fn parse_listagg() { }), asc: None, nulls_first: None, + with_fill: None, }, OrderByExpr { expr: Expr::Identifier(Ident { @@ -2587,6 +2596,7 @@ fn parse_listagg() { }), asc: None, nulls_first: None, + with_fill: None, }, ] }), @@ -3437,7 +3447,7 @@ fn parse_create_table_as_table() { table_name: Some("old_table".to_string()), schema_name: None, }))), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, @@ -3464,7 +3474,7 @@ fn parse_create_table_as_table() { table_name: Some("old_table".to_string()), schema_name: Some("schema_name".to_string()), }))), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, @@ -4384,6 +4394,7 @@ fn parse_window_functions() { expr: Expr::Identifier(Ident::new("dt")), asc: Some(false), nulls_first: None, + with_fill: None, }], window_frame: None, })), @@ -4593,6 +4604,7 @@ fn test_parse_named_window() { }), asc: None, nulls_first: None, + with_fill: None, }], window_frame: None, }), @@ -5014,7 +5026,7 @@ fn parse_interval_and_or_xor() { value_table_mode: None, connect_by: None, }))), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, @@ -7300,11 +7312,13 @@ fn parse_create_index() { expr: Expr::Identifier(Ident::new("name")), asc: None, nulls_first: None, + with_fill: None, }, OrderByExpr { expr: Expr::Identifier(Ident::new("age")), asc: Some(false), nulls_first: None, + with_fill: None, }, ]; match verified_stmt(sql) { @@ -7334,11 +7348,13 @@ fn test_create_index_with_using_function() { expr: Expr::Identifier(Ident::new("name")), asc: None, nulls_first: None, + with_fill: None, }, OrderByExpr { expr: Expr::Identifier(Ident::new("age")), asc: Some(false), nulls_first: None, + with_fill: None, }, ]; match verified_stmt(sql) { @@ -7691,7 +7707,7 @@ fn parse_merge() { value_table_mode: None, connect_by: None, }))), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, @@ -9223,7 +9239,7 @@ fn parse_unload() { fetch: None, locks: vec![], for_clause: None, - order_by: vec![], + order_by: None, settings: None, format_clause: None, }), @@ -9622,6 +9638,7 @@ fn test_match_recognize() { expr: Expr::Identifier(Ident::new("price_date")), asc: None, nulls_first: None, + with_fill: None, }], measures: vec![ Measure { diff --git a/tests/sqlparser_mssql.rs b/tests/sqlparser_mssql.rs index 26bece81d..3e8b6afbf 100644 --- a/tests/sqlparser_mssql.rs +++ b/tests/sqlparser_mssql.rs @@ -103,7 +103,7 @@ fn parse_create_procedure() { fetch: None, locks: vec![], for_clause: None, - order_by: vec![], + order_by: None, settings: None, format_clause: None, body: Box::new(SetExpr::Select(Box::new(Select { @@ -546,7 +546,7 @@ fn parse_substring_in_select() { value_table_mode: None, connect_by: None, }))), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index c2ce407a7..b0b29f347 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -946,7 +946,7 @@ fn parse_escaped_quote_identifiers_with_escape() { value_table_mode: None, connect_by: None, }))), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, @@ -996,7 +996,7 @@ fn parse_escaped_quote_identifiers_with_no_escape() { value_table_mode: None, connect_by: None, }))), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, @@ -1043,7 +1043,7 @@ fn parse_escaped_backticks_with_escape() { value_table_mode: None, connect_by: None, }))), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, @@ -1090,7 +1090,7 @@ fn parse_escaped_backticks_with_no_escape() { value_table_mode: None, connect_by: None, }))), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, @@ -1296,7 +1296,7 @@ fn parse_simple_insert() { ] ] })), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, @@ -1340,7 +1340,7 @@ fn parse_ignore_insert() { Expr::Value(number("1")) ]] })), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, @@ -1384,7 +1384,7 @@ fn parse_priority_insert() { Expr::Value(number("1")) ]] })), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, @@ -1425,7 +1425,7 @@ fn parse_priority_insert() { Expr::Value(number("1")) ]] })), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, @@ -1474,7 +1474,7 @@ fn parse_insert_as() { "2024-01-01".to_string() ))]] })), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, @@ -1535,7 +1535,7 @@ fn parse_insert_as() { Expr::Value(Value::SingleQuotedString("2024-01-01".to_string())) ]] })), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, @@ -1580,7 +1580,7 @@ fn parse_replace_insert() { Expr::Value(number("1")) ]] })), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, @@ -1619,7 +1619,7 @@ fn parse_empty_row_insert() { explicit_row: false, rows: vec![vec![], vec![]] })), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, @@ -1681,7 +1681,7 @@ fn parse_insert_with_on_duplicate_update() { Expr::Value(Value::Boolean(true)), ]] })), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, @@ -1946,6 +1946,7 @@ fn parse_delete_with_order_by() { }), asc: Some(false), nulls_first: None, + with_fill: None, }], order_by ); @@ -2331,7 +2332,7 @@ fn parse_substring_in_select() { value_table_mode: None, connect_by: None, }))), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, @@ -2639,7 +2640,7 @@ fn parse_hex_string_introducer() { into: None, connect_by: None, }))), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index ed17e9d8f..5ac421da0 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -1159,7 +1159,7 @@ fn parse_copy_to() { value_table_mode: None, connect_by: None, }))), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, @@ -2491,7 +2491,7 @@ fn parse_array_subquery_expr() { connect_by: None, }))), }), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, @@ -4162,7 +4162,7 @@ fn test_simple_postgres_insert_with_alias() { Expr::Value(Value::Number("123".to_string(), false)) ]] })), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, @@ -4231,7 +4231,7 @@ fn test_simple_postgres_insert_with_alias() { )) ]] })), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, @@ -4296,7 +4296,7 @@ fn test_simple_insert_with_quoted_alias() { Expr::Value(Value::SingleQuotedString("0123".to_string())) ]] })), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, From 028ada8350d3b2ada4aa67f5e828b318565590f2 Mon Sep 17 00:00:00 2001 From: Ifeanyi Ubah Date: Sat, 20 Jul 2024 12:55:24 +0200 Subject: [PATCH 05/57] Support subquery expression in SET expressions (#1343) --- src/parser/mod.rs | 42 +++++++++++++++++++++++++-------------- tests/sqlparser_common.rs | 30 ++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+), 15 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index fb15275e9..132e4f04e 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1208,20 +1208,18 @@ impl<'a> Parser<'a> { Ok(Expr::Value(self.parse_value()?)) } Token::LParen => { - let expr = - if self.parse_keyword(Keyword::SELECT) || self.parse_keyword(Keyword::WITH) { - self.prev_token(); - Expr::Subquery(self.parse_boxed_query()?) - } else if let Some(lambda) = self.try_parse_lambda() { - return Ok(lambda); - } else { - let exprs = self.parse_comma_separated(Parser::parse_expr)?; - match exprs.len() { - 0 => unreachable!(), // parse_comma_separated ensures 1 or more - 1 => Expr::Nested(Box::new(exprs.into_iter().next().unwrap())), - _ => Expr::Tuple(exprs), - } - }; + let expr = if let Some(expr) = self.try_parse_expr_sub_query()? { + expr + } else if let Some(lambda) = self.try_parse_lambda() { + return Ok(lambda); + } else { + let exprs = self.parse_comma_separated(Parser::parse_expr)?; + match exprs.len() { + 0 => unreachable!(), // parse_comma_separated ensures 1 or more + 1 => Expr::Nested(Box::new(exprs.into_iter().next().unwrap())), + _ => Expr::Tuple(exprs), + } + }; self.expect_token(&Token::RParen)?; if !self.consume_token(&Token::Period) { Ok(expr) @@ -1263,6 +1261,18 @@ impl<'a> Parser<'a> { } } + fn try_parse_expr_sub_query(&mut self) -> Result, ParserError> { + if self + .parse_one_of_keywords(&[Keyword::SELECT, Keyword::WITH]) + .is_none() + { + return Ok(None); + } + self.prev_token(); + + Ok(Some(Expr::Subquery(self.parse_boxed_query()?))) + } + fn try_parse_lambda(&mut self) -> Option { if !self.dialect.supports_lambda_functions() { return None; @@ -8709,7 +8719,9 @@ impl<'a> Parser<'a> { let mut values = vec![]; loop { - let value = if let Ok(expr) = self.parse_expr() { + let value = if let Some(expr) = self.try_parse_expr_sub_query()? { + expr + } else if let Ok(expr) = self.parse_expr() { expr } else { self.expected("variable value", self.peek_token())? diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 125e5f1f8..b1afdf28b 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -7135,9 +7135,39 @@ fn parse_set_variable() { _ => unreachable!(), } + // Subquery expression + for (sql, canonical) in [ + ( + "SET (a) = (SELECT 22 FROM tbl1)", + "SET (a) = ((SELECT 22 FROM tbl1))", + ), + ( + "SET (a) = (SELECT 22 FROM tbl1, (SELECT 1 FROM tbl2))", + "SET (a) = ((SELECT 22 FROM tbl1, (SELECT 1 FROM tbl2)))", + ), + ( + "SET (a) = ((SELECT 22 FROM tbl1, (SELECT 1 FROM tbl2)))", + "SET (a) = ((SELECT 22 FROM tbl1, (SELECT 1 FROM tbl2)))", + ), + ( + "SET (a, b) = ((SELECT 22 FROM tbl1, (SELECT 1 FROM tbl2)), SELECT 33 FROM tbl3)", + "SET (a, b) = ((SELECT 22 FROM tbl1, (SELECT 1 FROM tbl2)), (SELECT 33 FROM tbl3))", + ), + ] { + multi_variable_dialects.one_statement_parses_to(sql, canonical); + } + let error_sqls = [ ("SET (a, b, c) = (1, 2, 3", "Expected: ), found: EOF"), ("SET (a, b, c) = 1, 2, 3", "Expected: (, found: 1"), + ( + "SET (a) = ((SELECT 22 FROM tbl1)", + "Expected: ), found: EOF", + ), + ( + "SET (a) = ((SELECT 22 FROM tbl1) (SELECT 22 FROM tbl1))", + "Expected: ), found: (", + ), ]; for (sql, error) in error_sqls { assert_eq!( From 71dc96658655e25288acdb9dc1d5c9d0f245016a Mon Sep 17 00:00:00 2001 From: Alexander Beedie Date: Sun, 21 Jul 2024 14:02:12 +0400 Subject: [PATCH 06/57] Fix quoted identifier regression edge-case with "from" in SELECT (#1346) --- src/parser/mod.rs | 2 +- tests/sqlparser_common.rs | 11 ++++++++--- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 132e4f04e..175b02765 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -10331,7 +10331,7 @@ impl<'a> Parser<'a> { Expr::Wildcard => Ok(SelectItem::Wildcard( self.parse_wildcard_additional_options()?, )), - Expr::Identifier(v) if v.value.to_lowercase() == "from" => { + Expr::Identifier(v) if v.value.to_lowercase() == "from" && v.quote_style.is_none() => { parser_err!( format!("Expected an expression, found: {}", v), self.peek_token().location diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index b1afdf28b..dbadb4813 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -9005,7 +9005,7 @@ fn parse_non_latin_identifiers() { #[test] fn parse_trailing_comma() { - // At the moment, Duck DB is the only dialect that allows + // At the moment, DuckDB is the only dialect that allows // trailing commas anywhere in the query let trailing_commas = TestedDialects { dialects: vec![Box::new(DuckDbDialect {})], @@ -9038,11 +9038,16 @@ fn parse_trailing_comma() { ); trailing_commas.verified_stmt("SELECT album_id, name FROM track"); - trailing_commas.verified_stmt("SELECT * FROM track ORDER BY milliseconds"); - trailing_commas.verified_stmt("SELECT DISTINCT ON (album_id) name FROM track"); + // check quoted "from" identifier edge-case + trailing_commas.one_statement_parses_to( + r#"SELECT "from", FROM "from""#, + r#"SELECT "from" FROM "from""#, + ); + trailing_commas.verified_stmt(r#"SELECT "from" FROM "from""#); + // doesn't allow any trailing commas let trailing_commas = TestedDialects { dialects: vec![Box::new(GenericDialect {})], From 48ea5640a221b91a93fad769f96cd2aa37932436 Mon Sep 17 00:00:00 2001 From: Jax Liu Date: Sun, 21 Jul 2024 20:18:50 +0800 Subject: [PATCH 07/57] Support Map literal syntax for DuckDB and Generic (#1344) --- src/ast/mod.rs | 42 ++++++++++++++ src/dialect/duckdb.rs | 7 +++ src/dialect/generic.rs | 4 ++ src/dialect/mod.rs | 5 ++ src/parser/mod.rs | 44 ++++++++++++++ tests/sqlparser_common.rs | 95 +++++++++++++++++++++++++++++++ tests/sqlparser_custom_dialect.rs | 22 +++++++ 7 files changed, 219 insertions(+) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 2a519fc7c..cdc2e2049 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -329,6 +329,37 @@ impl fmt::Display for DictionaryField { } } +/// Represents a Map expression. +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct Map { + pub entries: Vec, +} + +impl Display for Map { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "MAP {{{}}}", display_comma_separated(&self.entries)) + } +} + +/// A map field within a map. +/// +/// [duckdb]: https://duckdb.org/docs/sql/data_types/map.html#creating-maps +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct MapEntry { + pub key: Box, + pub value: Box, +} + +impl fmt::Display for MapEntry { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}: {}", self.key, self.value) + } +} + /// Options for `CAST` / `TRY_CAST` /// BigQuery: #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] @@ -764,6 +795,14 @@ pub enum Expr { /// ``` /// [1]: https://duckdb.org/docs/sql/data_types/struct#creating-structs Dictionary(Vec), + /// `DuckDB` specific `Map` literal expression [1] + /// + /// Syntax: + /// ```sql + /// syntax: Map {key1: value1[, ... ]} + /// ``` + /// [1]: https://duckdb.org/docs/sql/data_types/map#creating-maps + Map(Map), /// An access of nested data using subscript syntax, for example `array[2]`. Subscript { expr: Box, @@ -1331,6 +1370,9 @@ impl fmt::Display for Expr { Expr::Dictionary(fields) => { write!(f, "{{{}}}", display_comma_separated(fields)) } + Expr::Map(map) => { + write!(f, "{map}") + } Expr::Subscript { expr, subscript: key, diff --git a/src/dialect/duckdb.rs b/src/dialect/duckdb.rs index c6edeac14..1fc211685 100644 --- a/src/dialect/duckdb.rs +++ b/src/dialect/duckdb.rs @@ -48,4 +48,11 @@ impl Dialect for DuckDbDialect { fn supports_dictionary_syntax(&self) -> bool { true } + + // DuckDB uses this syntax for `MAP`s. + // + // https://duckdb.org/docs/sql/data_types/map.html#creating-maps + fn support_map_literal_syntax(&self) -> bool { + true + } } diff --git a/src/dialect/generic.rs b/src/dialect/generic.rs index 33391d479..8d762d780 100644 --- a/src/dialect/generic.rs +++ b/src/dialect/generic.rs @@ -70,4 +70,8 @@ impl Dialect for GenericDialect { fn supports_select_wildcard_except(&self) -> bool { true } + + fn support_map_literal_syntax(&self) -> bool { + true + } } diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index b223ead47..3ff7bb2a5 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -215,6 +215,11 @@ pub trait Dialect: Debug + Any { fn supports_dictionary_syntax(&self) -> bool { false } + /// Returns true if the dialect supports defining object using the + /// syntax like `Map {1: 10, 2: 20}`. + fn support_map_literal_syntax(&self) -> bool { + false + } /// Returns true if the dialect supports lambda functions, for example: /// /// ```sql diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 175b02765..878cabfcc 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1078,6 +1078,9 @@ impl<'a> Parser<'a> { let expr = self.parse_subexpr(Self::PLUS_MINUS_PREC)?; Ok(Expr::Prior(Box::new(expr))) } + Keyword::MAP if self.peek_token() == Token::LBrace && self.dialect.support_map_literal_syntax() => { + self.parse_duckdb_map_literal() + } // Here `w` is a word, check if it's a part of a multipart // identifier, a function call, or a simple identifier: _ => match self.peek_token().token { @@ -2322,6 +2325,47 @@ impl<'a> Parser<'a> { }) } + /// DuckDB specific: Parse a duckdb [map] + /// + /// Syntax: + /// + /// ```sql + /// Map {key1: value1[, ... ]} + /// ``` + /// + /// [map]: https://duckdb.org/docs/sql/data_types/map.html#creating-maps + fn parse_duckdb_map_literal(&mut self) -> Result { + self.expect_token(&Token::LBrace)?; + + let fields = self.parse_comma_separated(Self::parse_duckdb_map_field)?; + + self.expect_token(&Token::RBrace)?; + + Ok(Expr::Map(Map { entries: fields })) + } + + /// Parse a field for a duckdb [map] + /// + /// Syntax + /// + /// ```sql + /// key: value + /// ``` + /// + /// [map]: https://duckdb.org/docs/sql/data_types/map.html#creating-maps + fn parse_duckdb_map_field(&mut self) -> Result { + let key = self.parse_expr()?; + + self.expect_token(&Token::Colon)?; + + let value = self.parse_expr()?; + + Ok(MapEntry { + key: Box::new(key), + value: Box::new(value), + }) + } + /// Parse clickhouse [map] /// /// Syntax diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index dbadb4813..ac5098f58 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -10077,6 +10077,101 @@ fn test_dictionary_syntax() { ) } +#[test] +fn test_map_syntax() { + fn check(sql: &str, expect: Expr) { + assert_eq!( + all_dialects_where(|d| d.support_map_literal_syntax()).verified_expr(sql), + expect + ); + } + + check( + "MAP {'Alberta': 'Edmonton', 'Manitoba': 'Winnipeg'}", + Expr::Map(Map { + entries: vec![ + MapEntry { + key: Box::new(Expr::Value(Value::SingleQuotedString("Alberta".to_owned()))), + value: Box::new(Expr::Value(Value::SingleQuotedString( + "Edmonton".to_owned(), + ))), + }, + MapEntry { + key: Box::new(Expr::Value(Value::SingleQuotedString( + "Manitoba".to_owned(), + ))), + value: Box::new(Expr::Value(Value::SingleQuotedString( + "Winnipeg".to_owned(), + ))), + }, + ], + }), + ); + + fn number_expr(s: &str) -> Expr { + Expr::Value(number(s)) + } + + check( + "MAP {1: 10.0, 2: 20.0}", + Expr::Map(Map { + entries: vec![ + MapEntry { + key: Box::new(number_expr("1")), + value: Box::new(number_expr("10.0")), + }, + MapEntry { + key: Box::new(number_expr("2")), + value: Box::new(number_expr("20.0")), + }, + ], + }), + ); + + check( + "MAP {[1, 2, 3]: 10.0, [4, 5, 6]: 20.0}", + Expr::Map(Map { + entries: vec![ + MapEntry { + key: Box::new(Expr::Array(Array { + elem: vec![number_expr("1"), number_expr("2"), number_expr("3")], + named: false, + })), + value: Box::new(Expr::Value(number("10.0"))), + }, + MapEntry { + key: Box::new(Expr::Array(Array { + elem: vec![number_expr("4"), number_expr("5"), number_expr("6")], + named: false, + })), + value: Box::new(Expr::Value(number("20.0"))), + }, + ], + }), + ); + + check( + "MAP {'a': 10, 'b': 20}['a']", + Expr::Subscript { + expr: Box::new(Expr::Map(Map { + entries: vec![ + MapEntry { + key: Box::new(Expr::Value(Value::SingleQuotedString("a".to_owned()))), + value: Box::new(number_expr("10")), + }, + MapEntry { + key: Box::new(Expr::Value(Value::SingleQuotedString("b".to_owned()))), + value: Box::new(number_expr("20")), + }, + ], + })), + subscript: Box::new(Subscript::Index { + index: Expr::Value(Value::SingleQuotedString("a".to_owned())), + }), + }, + ); +} + #[test] fn parse_within_group() { verified_expr("PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY sales_amount)"); diff --git a/tests/sqlparser_custom_dialect.rs b/tests/sqlparser_custom_dialect.rs index 516591382..5b29047a4 100644 --- a/tests/sqlparser_custom_dialect.rs +++ b/tests/sqlparser_custom_dialect.rs @@ -125,6 +125,28 @@ fn custom_statement_parser() -> Result<(), ParserError> { Ok(()) } +#[test] +fn test_map_syntax_not_support_default() -> Result<(), ParserError> { + #[derive(Debug)] + struct MyDialect {} + + impl Dialect for MyDialect { + fn is_identifier_start(&self, ch: char) -> bool { + is_identifier_start(ch) + } + + fn is_identifier_part(&self, ch: char) -> bool { + is_identifier_part(ch) + } + } + + let dialect = MyDialect {}; + let sql = "SELECT MAP {1: 2}"; + let ast = Parser::parse_sql(&dialect, sql); + assert!(ast.is_err()); + Ok(()) +} + fn is_identifier_start(ch: char) -> bool { ch.is_ascii_lowercase() || ch.is_ascii_uppercase() || ch == '_' } From b27abf00e2e67b28b25afc9da7c2ddd2a104c449 Mon Sep 17 00:00:00 2001 From: hulk Date: Tue, 23 Jul 2024 03:50:24 +0800 Subject: [PATCH 08/57] Allow to use `()` as the GROUP BY nothing (#1347) --- src/parser/mod.rs | 5 +++++ tests/sqlparser_common.rs | 28 ++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 878cabfcc..11fa9e4a9 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1487,6 +1487,11 @@ impl<'a> Parser<'a> { let result = self.parse_comma_separated(|p| p.parse_tuple(true, true))?; self.expect_token(&Token::RParen)?; Ok(Expr::Rollup(result)) + } else if self.consume_tokens(&[Token::LParen, Token::RParen]) { + // PostgreSQL allow to use empty tuple as a group by expression, + // e.g. `GROUP BY (), name`. Please refer to GROUP BY Clause section in + // [PostgreSQL](https://www.postgresql.org/docs/16/sql-select.html) + Ok(Expr::Tuple(vec![])) } else { self.parse_expr() } diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index ac5098f58..dd3ed0515 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -42,6 +42,7 @@ mod test_utils; #[cfg(test)] use pretty_assertions::assert_eq; +use sqlparser::ast::Expr::Identifier; use sqlparser::test_utils::all_dialects_except; #[test] @@ -10278,3 +10279,30 @@ fn parse_auto_increment_too_large() { assert!(res.is_err(), "{res:?}"); } + +#[test] +fn test_group_by_nothing() { + let Select { group_by, .. } = all_dialects_where(|d| d.supports_group_by_expr()) + .verified_only_select("SELECT count(1) FROM t GROUP BY ()"); + { + std::assert_eq!( + GroupByExpr::Expressions(vec![Expr::Tuple(vec![])], vec![]), + group_by + ); + } + + let Select { group_by, .. } = all_dialects_where(|d| d.supports_group_by_expr()) + .verified_only_select("SELECT name, count(1) FROM t GROUP BY name, ()"); + { + std::assert_eq!( + GroupByExpr::Expressions( + vec![ + Identifier(Ident::new("name".to_string())), + Expr::Tuple(vec![]) + ], + vec![] + ), + group_by + ); + } +} From 390d4d3554580f618c6d8edd177b875b849f326f Mon Sep 17 00:00:00 2001 From: hulk Date: Wed, 24 Jul 2024 00:41:07 +0800 Subject: [PATCH 09/57] Add support of MATERIALIZED/ALIAS/EPHERMERAL default column options for ClickHouse (#1348) --- src/ast/ddl.rs | 21 ++++++++ src/keywords.rs | 2 + src/parser/mod.rs | 18 +++++++ tests/sqlparser_clickhouse.rs | 96 +++++++++++++++++++++++++++++++++++ 4 files changed, 137 insertions(+) diff --git a/src/ast/ddl.rs b/src/ast/ddl.rs index 1ed3857d7..5cc671cf5 100644 --- a/src/ast/ddl.rs +++ b/src/ast/ddl.rs @@ -923,6 +923,18 @@ pub enum ColumnOption { NotNull, /// `DEFAULT ` Default(Expr), + + /// ClickHouse supports `MATERIALIZE`, `EPHEMERAL` and `ALIAS` expr to generate default values. + /// Syntax: `b INT MATERIALIZE (a + 1)` + /// [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/create/table#default_values) + + /// `MATERIALIZE ` + Materialized(Expr), + /// `EPHEMERAL []` + Ephemeral(Option), + /// `ALIAS ` + Alias(Expr), + /// `{ PRIMARY KEY | UNIQUE } []` Unique { is_primary: bool, @@ -978,6 +990,15 @@ impl fmt::Display for ColumnOption { Null => write!(f, "NULL"), NotNull => write!(f, "NOT NULL"), Default(expr) => write!(f, "DEFAULT {expr}"), + Materialized(expr) => write!(f, "MATERIALIZED {expr}"), + Ephemeral(expr) => { + if let Some(e) = expr { + write!(f, "EPHEMERAL {e}") + } else { + write!(f, "EPHEMERAL") + } + } + Alias(expr) => write!(f, "ALIAS {expr}"), Unique { is_primary, characteristics, diff --git a/src/keywords.rs b/src/keywords.rs index 2b6900fba..e59e49339 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -77,6 +77,7 @@ define_keywords!( AFTER, AGAINST, AGGREGATION, + ALIAS, ALL, ALLOCATE, ALTER, @@ -267,6 +268,7 @@ define_keywords!( ENFORCED, ENGINE, ENUM, + EPHEMERAL, EPOCH, EQUALS, ERROR, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 11fa9e4a9..f8267a7cb 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -5748,6 +5748,24 @@ impl<'a> Parser<'a> { Ok(Some(ColumnOption::Null)) } else if self.parse_keyword(Keyword::DEFAULT) { Ok(Some(ColumnOption::Default(self.parse_expr()?))) + } else if dialect_of!(self is ClickHouseDialect| GenericDialect) + && self.parse_keyword(Keyword::MATERIALIZED) + { + Ok(Some(ColumnOption::Materialized(self.parse_expr()?))) + } else if dialect_of!(self is ClickHouseDialect| GenericDialect) + && self.parse_keyword(Keyword::ALIAS) + { + Ok(Some(ColumnOption::Alias(self.parse_expr()?))) + } else if dialect_of!(self is ClickHouseDialect| GenericDialect) + && self.parse_keyword(Keyword::EPHEMERAL) + { + // The expression is optional for the EPHEMERAL syntax, so we need to check + // if the column definition has remaining tokens before parsing the expression. + if matches!(self.peek_token().token, Token::Comma | Token::RParen) { + Ok(Some(ColumnOption::Ephemeral(None))) + } else { + Ok(Some(ColumnOption::Ephemeral(Some(self.parse_expr()?)))) + } } else if self.parse_keywords(&[Keyword::PRIMARY, Keyword::KEY]) { let characteristics = self.parse_constraint_characteristics()?; Ok(Some(ColumnOption::Unique { diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index 10d7d66ff..6fdadc366 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -493,6 +493,102 @@ fn parse_create_table_with_primary_key() { .expect_err("ORDER BY supports one expression with tuple"); } +#[test] +fn parse_create_table_with_variant_default_expressions() { + let sql = concat!( + "CREATE TABLE table (", + "a DATETIME MATERIALIZED now(),", + " b DATETIME EPHEMERAL now(),", + " c DATETIME EPHEMERAL,", + " d STRING ALIAS toString(c)", + ") ENGINE=MergeTree" + ); + match clickhouse_and_generic().verified_stmt(sql) { + Statement::CreateTable(CreateTable { columns, .. }) => { + assert_eq!( + columns, + vec![ + ColumnDef { + name: Ident::new("a"), + data_type: DataType::Datetime(None), + collation: None, + options: vec![ColumnOptionDef { + name: None, + option: ColumnOption::Materialized(Expr::Function(Function { + name: ObjectName(vec![Ident::new("now")]), + args: FunctionArguments::List(FunctionArgumentList { + args: vec![], + duplicate_treatment: None, + clauses: vec![], + }), + parameters: FunctionArguments::None, + null_treatment: None, + filter: None, + over: None, + within_group: vec![], + })) + }], + }, + ColumnDef { + name: Ident::new("b"), + data_type: DataType::Datetime(None), + collation: None, + options: vec![ColumnOptionDef { + name: None, + option: ColumnOption::Ephemeral(Some(Expr::Function(Function { + name: ObjectName(vec![Ident::new("now")]), + args: FunctionArguments::List(FunctionArgumentList { + args: vec![], + duplicate_treatment: None, + clauses: vec![], + }), + parameters: FunctionArguments::None, + null_treatment: None, + filter: None, + over: None, + within_group: vec![], + }))) + }], + }, + ColumnDef { + name: Ident::new("c"), + data_type: DataType::Datetime(None), + collation: None, + options: vec![ColumnOptionDef { + name: None, + option: ColumnOption::Ephemeral(None) + }], + }, + ColumnDef { + name: Ident::new("d"), + data_type: DataType::String(None), + collation: None, + options: vec![ColumnOptionDef { + name: None, + option: ColumnOption::Alias(Expr::Function(Function { + name: ObjectName(vec![Ident::new("toString")]), + args: FunctionArguments::List(FunctionArgumentList { + args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr( + Identifier(Ident::new("c")) + ))], + duplicate_treatment: None, + clauses: vec![], + }), + parameters: FunctionArguments::None, + null_treatment: None, + filter: None, + over: None, + within_group: vec![], + })) + }], + } + ] + ) + } + _ => unreachable!(), + } +} + #[test] fn parse_create_view_with_fields_data_types() { match clickhouse().verified_stmt(r#"CREATE VIEW v (i "int", f "String") AS SELECT * FROM t"#) { From 1e82a145adcc090b2768814f19f23fd4d80267a5 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 23 Jul 2024 12:56:55 -0400 Subject: [PATCH 10/57] Add CHANGELOG for 0.49.0 (#1350) --- CHANGELOG.md | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index ed5c9ecb4..cf2d1321b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,27 @@ changes that break via addition as "Added". ## [Unreleased] Check https://github.com/sqlparser-rs/sqlparser-rs/commits/main for undocumented changes. +## [0.49.0] 2024-07-23 +As always, huge props to @iffyio @jmhain and @lovasoa for their help reviewing and merging PRs! + +We are in the process of moving sqlparser to governed as part of the Apache +DataFusion project: https://github.com/sqlparser-rs/sqlparser-rs/issues/1294 + +### Fixed +* Fix quoted identifier regression edge-case with "from" in SELECT (#1346) - Thanks @alexander-beedie +* Fix `AS` query clause should be after the create table options (#1339) - Thanks @git-hulk + +### Added + +* Support `MATERIALIZED`/`ALIAS`/`EPHERMERAL` default column options for ClickHouse (#1348) - Thanks @git-hulk +* Support `()` as the `GROUP BY` nothing (#1347) - Thanks @git-hulk +* Support Map literal syntax for DuckDB and Generic (#1344) - Thanks @goldmedal +* Support subquery expression in `SET` expressions (#1343) - Thanks @iffyio +* Support `WITH FILL` for ClickHouse (#1330) - Thanks @nickpresta +* Support `PARTITION BY` for PostgreSQL in `CREATE TABLE` statement (#1338) - Thanks @git-hulk +* Support of table function `WITH ORDINALITY` modifier for Postgres (#1337) - Thanks @git-hulk + + ## [0.48.0] 2024-07-09 Huge shout out to @iffyio @jmhain and @lovasoa for their help reviewing and merging PRs! From 6c64d43e1bbf4ebc78754c63560894f0d867bdac Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 23 Jul 2024 13:11:16 -0400 Subject: [PATCH 11/57] chore: Release sqlparser version 0.49.0 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index b0bee003e..4c510a8c6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "sqlparser" description = "Extensible SQL Lexer and Parser with support for ANSI SQL:2011" -version = "0.48.0" +version = "0.49.0" authors = ["Andy Grove "] homepage = "https://github.com/sqlparser-rs/sqlparser-rs" documentation = "https://docs.rs/sqlparser/" From 547d82f07de4480d236a061a41bfadac21235434 Mon Sep 17 00:00:00 2001 From: Ophir LOJKINE Date: Mon, 29 Jul 2024 14:49:05 +0200 Subject: [PATCH 12/57] fix CI clippy `1.80` warnings (#1357) --- src/keywords.rs | 2 +- src/test_utils.rs | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/keywords.rs b/src/keywords.rs index e59e49339..4b599f12a 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -20,7 +20,7 @@ //! As a matter of fact, most of these keywords are not used at all //! and could be removed. //! 3) a `RESERVED_FOR_TABLE_ALIAS` array with keywords reserved in a -//! "table alias" context. +//! "table alias" context. #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; diff --git a/src/test_utils.rs b/src/test_utils.rs index 1f5300be1..5ed6339bd 100644 --- a/src/test_utils.rs +++ b/src/test_utils.rs @@ -111,10 +111,10 @@ impl TestedDialects { /// that: /// /// 1. parsing `sql` results in the same [`Statement`] as parsing - /// `canonical`. + /// `canonical`. /// /// 2. re-serializing the result of parsing `sql` produces the same - /// `canonical` sql string + /// `canonical` sql string pub fn one_statement_parses_to(&self, sql: &str, canonical: &str) -> Statement { let mut statements = self.parse_sql_statements(sql).expect(sql); assert_eq!(statements.len(), 1); @@ -180,10 +180,10 @@ impl TestedDialects { /// Ensures that `sql` parses as a single [`Select`], and that additionally: /// /// 1. parsing `sql` results in the same [`Statement`] as parsing - /// `canonical`. + /// `canonical`. /// /// 2. re-serializing the result of parsing `sql` produces the same - /// `canonical` sql string + /// `canonical` sql string pub fn verified_only_select_with_canonical(&self, query: &str, canonical: &str) -> Select { let q = match self.one_statement_parses_to(query, canonical) { Statement::Query(query) => *query, From 7fdb2ec5d195bebca887a1532c49ec38741eca1b Mon Sep 17 00:00:00 2001 From: hulk Date: Tue, 30 Jul 2024 05:16:29 +0800 Subject: [PATCH 13/57] Allow to use the TABLE keyword in DESC|DESCRIBE|EXPLAIN TABLE statement (#1351) --- src/ast/mod.rs | 9 +++++++++ src/parser/mod.rs | 3 +++ tests/sqlparser_common.rs | 42 ++++++++++++++++++++++++++++----------- 3 files changed, 42 insertions(+), 12 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index cdc2e2049..d27baadc4 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -2699,6 +2699,11 @@ pub enum Statement { describe_alias: DescribeAlias, /// Hive style `FORMATTED | EXTENDED` hive_format: Option, + /// Snowflake and ClickHouse support `DESC|DESCRIBE TABLE ` syntax + /// + /// [Snowflake](https://docs.snowflake.com/en/sql-reference/sql/desc-table.html) + /// [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/describe-table) + has_table_keyword: bool, /// Table name #[cfg_attr(feature = "visitor", visit(with = "visit_relation"))] table_name: ObjectName, @@ -2872,6 +2877,7 @@ impl fmt::Display for Statement { Statement::ExplainTable { describe_alias, hive_format, + has_table_keyword, table_name, } => { write!(f, "{describe_alias} ")?; @@ -2879,6 +2885,9 @@ impl fmt::Display for Statement { if let Some(format) = hive_format { write!(f, "{} ", format)?; } + if *has_table_keyword { + write!(f, "TABLE ")?; + } write!(f, "{table_name}") } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index f8267a7cb..931033f7b 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -7972,10 +7972,13 @@ impl<'a> Parser<'a> { _ => {} } + // only allow to use TABLE keyword for DESC|DESCRIBE statement + let has_table_keyword = self.parse_keyword(Keyword::TABLE); let table_name = self.parse_object_name(false)?; Ok(Statement::ExplainTable { describe_alias, hive_format, + has_table_keyword, table_name, }) } diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index dd3ed0515..e68f25eb2 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -4186,31 +4186,49 @@ fn run_explain_analyze( #[test] fn parse_explain_table() { let validate_explain = - |query: &str, expected_describe_alias: DescribeAlias| match verified_stmt(query) { - Statement::ExplainTable { - describe_alias, - hive_format, - table_name, - } => { - assert_eq!(describe_alias, expected_describe_alias); - assert_eq!(hive_format, None); - assert_eq!("test_identifier", table_name.to_string()); + |query: &str, expected_describe_alias: DescribeAlias, expected_table_keyword| { + match verified_stmt(query) { + Statement::ExplainTable { + describe_alias, + hive_format, + has_table_keyword, + table_name, + } => { + assert_eq!(describe_alias, expected_describe_alias); + assert_eq!(hive_format, None); + assert_eq!(has_table_keyword, expected_table_keyword); + assert_eq!("test_identifier", table_name.to_string()); + } + _ => panic!("Unexpected Statement, must be ExplainTable"), } - _ => panic!("Unexpected Statement, must be ExplainTable"), }; - validate_explain("EXPLAIN test_identifier", DescribeAlias::Explain); - validate_explain("DESCRIBE test_identifier", DescribeAlias::Describe); + validate_explain("EXPLAIN test_identifier", DescribeAlias::Explain, false); + validate_explain("DESCRIBE test_identifier", DescribeAlias::Describe, false); + validate_explain("DESC test_identifier", DescribeAlias::Desc, false); + validate_explain( + "EXPLAIN TABLE test_identifier", + DescribeAlias::Explain, + true, + ); + validate_explain( + "DESCRIBE TABLE test_identifier", + DescribeAlias::Describe, + true, + ); + validate_explain("DESC TABLE test_identifier", DescribeAlias::Desc, true); } #[test] fn explain_describe() { verified_stmt("DESCRIBE test.table"); + verified_stmt("DESCRIBE TABLE test.table"); } #[test] fn explain_desc() { verified_stmt("DESC test.table"); + verified_stmt("DESC TABLE test.table"); } #[test] From c3ba2f33c6f52ce4dfea87bae9e77460db8f917f Mon Sep 17 00:00:00 2001 From: Joey Hain Date: Mon, 29 Jul 2024 14:17:11 -0700 Subject: [PATCH 14/57] Snowflake: support position with normal function call syntax (#1341) Co-authored-by: Ifeanyi Ubah --- src/parser/mod.rs | 30 ++++++++++++++++-------------- tests/sqlparser_common.rs | 30 +++++++++++++++++------------- tests/sqlparser_snowflake.rs | 6 ++++++ 3 files changed, 39 insertions(+), 27 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 931033f7b..b3120bb30 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1038,7 +1038,7 @@ impl<'a> Parser<'a> { Keyword::CEIL => self.parse_ceil_floor_expr(true), Keyword::FLOOR => self.parse_ceil_floor_expr(false), Keyword::POSITION if self.peek_token().token == Token::LParen => { - self.parse_position_expr() + self.parse_position_expr(w.to_ident()) } Keyword::SUBSTRING => self.parse_substring_expr(), Keyword::OVERLAY => self.parse_overlay_expr(), @@ -1707,24 +1707,26 @@ impl<'a> Parser<'a> { } } - pub fn parse_position_expr(&mut self) -> Result { - // PARSE SELECT POSITION('@' in field) - self.expect_token(&Token::LParen)?; + pub fn parse_position_expr(&mut self, ident: Ident) -> Result { + let position_expr = self.maybe_parse(|p| { + // PARSE SELECT POSITION('@' in field) + p.expect_token(&Token::LParen)?; - // Parse the subexpr till the IN keyword - let expr = self.parse_subexpr(Self::BETWEEN_PREC)?; - if self.parse_keyword(Keyword::IN) { - let from = self.parse_expr()?; - self.expect_token(&Token::RParen)?; + // Parse the subexpr till the IN keyword + let expr = p.parse_subexpr(Self::BETWEEN_PREC)?; + p.expect_keyword(Keyword::IN)?; + let from = p.parse_expr()?; + p.expect_token(&Token::RParen)?; Ok(Expr::Position { expr: Box::new(expr), r#in: Box::new(from), }) - } else { - parser_err!( - "Position function must include IN keyword".to_string(), - self.peek_token().location - ) + }); + match position_expr { + Some(expr) => Ok(expr), + // Snowflake supports `position` as an ordinary function call + // without the special `IN` syntax. + None => self.parse_function(ObjectName(vec![ident])), } } diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index e68f25eb2..5de76f78f 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -4151,7 +4151,7 @@ fn parse_scalar_function_in_projection() { for function_name in names { // like SELECT sqrt(id) FROM foo - let sql = dbg!(format!("SELECT {function_name}(id) FROM foo")); + let sql = format!("SELECT {function_name}(id) FROM foo"); let select = verified_only_select(&sql); assert_eq!( &call(function_name, [Expr::Identifier(Ident::new("id"))]), @@ -8254,30 +8254,34 @@ fn parse_time_functions() { #[test] fn parse_position() { - let sql = "SELECT POSITION('@' IN field)"; - let select = verified_only_select(sql); assert_eq!( - &Expr::Position { + Expr::Position { expr: Box::new(Expr::Value(Value::SingleQuotedString("@".to_string()))), r#in: Box::new(Expr::Identifier(Ident::new("field"))), }, - expr_from_projection(only(&select.projection)) + verified_expr("POSITION('@' IN field)"), ); -} -#[test] -fn parse_position_negative() { - let sql = "SELECT POSITION(foo) from bar"; - let res = parse_sql_statements(sql); + // some dialects (e.g. snowflake) support position as a function call (i.e. without IN) assert_eq!( - ParserError::ParserError("Position function must include IN keyword".to_string()), - res.unwrap_err() + call( + "position", + [ + Expr::Value(Value::SingleQuotedString("an".to_owned())), + Expr::Value(Value::SingleQuotedString("banana".to_owned())), + Expr::Value(number("1")), + ] + ), + verified_expr("position('an', 'banana', 1)") ); +} +#[test] +fn parse_position_negative() { let sql = "SELECT POSITION(foo IN) from bar"; let res = parse_sql_statements(sql); assert_eq!( - ParserError::ParserError("Expected: an expression:, found: )".to_string()), + ParserError::ParserError("Expected: (, found: )".to_string()), res.unwrap_err() ); } diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index 7a2288cbb..7abb1a947 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -2256,3 +2256,9 @@ fn asof_joins() { "ORDER BY s.observed", )); } + +#[test] +fn test_parse_position() { + snowflake().verified_query("SELECT position('an', 'banana', 1)"); + snowflake().verified_query("SELECT n, h, POSITION(n IN h) FROM pos"); +} From bc15f7b4ceab849a974e84fcd38bde353cb7c2d1 Mon Sep 17 00:00:00 2001 From: Ophir LOJKINE Date: Mon, 29 Jul 2024 23:18:16 +0200 Subject: [PATCH 15/57] Support for postgres String Constants with Unicode Escapes (#1355) --- src/ast/value.rs | 40 +++++++++++++++++++ src/dialect/generic.rs | 4 ++ src/dialect/mod.rs | 15 +++++++ src/dialect/postgresql.rs | 4 ++ src/parser/mod.rs | 7 ++++ src/tokenizer.rs | 78 +++++++++++++++++++++++++++++++++++++ tests/sqlparser_postgres.rs | 32 +++++++++++++++ 7 files changed, 180 insertions(+) diff --git a/src/ast/value.rs b/src/ast/value.rs index 4c1a56a92..17cdb839d 100644 --- a/src/ast/value.rs +++ b/src/ast/value.rs @@ -52,6 +52,10 @@ pub enum Value { /// See [Postgres docs](https://www.postgresql.org/docs/8.3/sql-syntax-lexical.html#SQL-SYNTAX-STRINGS) /// for more details. EscapedStringLiteral(String), + /// u&'string value' (postgres extension) + /// See [Postgres docs](https://www.postgresql.org/docs/current/sql-syntax-lexical.html#SQL-SYNTAX-STRINGS-UESCAPE) + /// for more details. + UnicodeStringLiteral(String), /// B'string value' SingleQuotedByteStringLiteral(String), /// B"string value" @@ -102,6 +106,7 @@ impl fmt::Display for Value { } Value::DollarQuotedString(v) => write!(f, "{v}"), Value::EscapedStringLiteral(v) => write!(f, "E'{}'", escape_escaped_string(v)), + Value::UnicodeStringLiteral(v) => write!(f, "U&'{}'", escape_unicode_string(v)), Value::NationalStringLiteral(v) => write!(f, "N'{v}'"), Value::HexStringLiteral(v) => write!(f, "X'{v}'"), Value::Boolean(v) => write!(f, "{v}"), @@ -347,6 +352,41 @@ pub fn escape_escaped_string(s: &str) -> EscapeEscapedStringLiteral<'_> { EscapeEscapedStringLiteral(s) } +pub struct EscapeUnicodeStringLiteral<'a>(&'a str); + +impl<'a> fmt::Display for EscapeUnicodeStringLiteral<'a> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + for c in self.0.chars() { + match c { + '\'' => { + write!(f, "''")?; + } + '\\' => { + write!(f, r#"\\"#)?; + } + x if x.is_ascii() => { + write!(f, "{}", c)?; + } + _ => { + let codepoint = c as u32; + // if the character fits in 32 bits, we can use the \XXXX format + // otherwise, we need to use the \+XXXXXX format + if codepoint <= 0xFFFF { + write!(f, "\\{:04X}", codepoint)?; + } else { + write!(f, "\\+{:06X}", codepoint)?; + } + } + } + } + Ok(()) + } +} + +pub fn escape_unicode_string(s: &str) -> EscapeUnicodeStringLiteral<'_> { + EscapeUnicodeStringLiteral(s) +} + #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] diff --git a/src/dialect/generic.rs b/src/dialect/generic.rs index 8d762d780..2777dfb02 100644 --- a/src/dialect/generic.rs +++ b/src/dialect/generic.rs @@ -35,6 +35,10 @@ impl Dialect for GenericDialect { || ch == '_' } + fn supports_unicode_string_literal(&self) -> bool { + true + } + fn supports_group_by_expr(&self) -> bool { true } diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index 3ff7bb2a5..22e0baeb2 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -145,6 +145,21 @@ pub trait Dialect: Debug + Any { fn supports_string_literal_backslash_escape(&self) -> bool { false } + + /// Determine if the dialect supports string literals with `U&` prefix. + /// This is used to specify Unicode code points in string literals. + /// For example, in PostgreSQL, the following is a valid string literal: + /// ```sql + /// SELECT U&'\0061\0062\0063'; + /// ``` + /// This is equivalent to the string literal `'abc'`. + /// See + /// - [Postgres docs](https://www.postgresql.org/docs/current/sql-syntax-lexical.html#SQL-SYNTAX-STRINGS-UESCAPE) + /// - [H2 docs](http://www.h2database.com/html/grammar.html#string) + fn supports_unicode_string_literal(&self) -> bool { + false + } + /// Does the dialect support `FILTER (WHERE expr)` for aggregate queries? fn supports_filter_during_aggregation(&self) -> bool { false diff --git a/src/dialect/postgresql.rs b/src/dialect/postgresql.rs index 0e04bfa27..8254e807b 100644 --- a/src/dialect/postgresql.rs +++ b/src/dialect/postgresql.rs @@ -40,6 +40,10 @@ impl Dialect for PostgreSqlDialect { ch.is_alphabetic() || ch.is_ascii_digit() || ch == '$' || ch == '_' } + fn supports_unicode_string_literal(&self) -> bool { + true + } + /// See fn is_custom_operator_part(&self, ch: char) -> bool { matches!( diff --git a/src/parser/mod.rs b/src/parser/mod.rs index b3120bb30..2b1c1ab7f 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1191,6 +1191,10 @@ impl<'a> Parser<'a> { self.prev_token(); Ok(Expr::Value(self.parse_value()?)) } + Token::UnicodeStringLiteral(_) => { + self.prev_token(); + Ok(Expr::Value(self.parse_value()?)) + } Token::Number(_, _) | Token::SingleQuotedString(_) | Token::DoubleQuotedString(_) @@ -1868,6 +1872,7 @@ impl<'a> Parser<'a> { } Token::SingleQuotedString(_) | Token::EscapedStringLiteral(_) + | Token::UnicodeStringLiteral(_) | Token::NationalStringLiteral(_) | Token::HexStringLiteral(_) => Some(Box::new(self.parse_expr()?)), _ => self.expected( @@ -6965,6 +6970,7 @@ impl<'a> Parser<'a> { } Token::NationalStringLiteral(ref s) => Ok(Value::NationalStringLiteral(s.to_string())), Token::EscapedStringLiteral(ref s) => Ok(Value::EscapedStringLiteral(s.to_string())), + Token::UnicodeStringLiteral(ref s) => Ok(Value::UnicodeStringLiteral(s.to_string())), Token::HexStringLiteral(ref s) => Ok(Value::HexStringLiteral(s.to_string())), Token::Placeholder(ref s) => Ok(Value::Placeholder(s.to_string())), tok @ Token::Colon | tok @ Token::AtSign => { @@ -7056,6 +7062,7 @@ impl<'a> Parser<'a> { Token::EscapedStringLiteral(s) if dialect_of!(self is PostgreSqlDialect | GenericDialect) => { Ok(s) } + Token::UnicodeStringLiteral(s) => Ok(s), _ => self.expected("literal string", next_token), } } diff --git a/src/tokenizer.rs b/src/tokenizer.rs index b8336cec8..be11a3140 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -94,6 +94,8 @@ pub enum Token { NationalStringLiteral(String), /// "escaped" string literal, which are an extension to the SQL standard: i.e: e'first \n second' or E 'first \n second' EscapedStringLiteral(String), + /// Unicode string literal: i.e: U&'first \000A second' + UnicodeStringLiteral(String), /// Hexadecimal string literal: i.e.: X'deadbeef' HexStringLiteral(String), /// Comma @@ -251,6 +253,7 @@ impl fmt::Display for Token { Token::DollarQuotedString(ref s) => write!(f, "{s}"), Token::NationalStringLiteral(ref s) => write!(f, "N'{s}'"), Token::EscapedStringLiteral(ref s) => write!(f, "E'{s}'"), + Token::UnicodeStringLiteral(ref s) => write!(f, "U&'{s}'"), Token::HexStringLiteral(ref s) => write!(f, "X'{s}'"), Token::SingleQuotedByteStringLiteral(ref s) => write!(f, "B'{s}'"), Token::TripleSingleQuotedByteStringLiteral(ref s) => write!(f, "B'''{s}'''"), @@ -794,6 +797,23 @@ impl<'a> Tokenizer<'a> { } } } + // Unicode string literals like U&'first \000A second' are supported in some dialects, including PostgreSQL + x @ 'u' | x @ 'U' if self.dialect.supports_unicode_string_literal() => { + chars.next(); // consume, to check the next char + if chars.peek() == Some(&'&') { + // we cannot advance the iterator here, as we need to consume the '&' later if the 'u' was an identifier + let mut chars_clone = chars.peekable.clone(); + chars_clone.next(); // consume the '&' in the clone + if chars_clone.peek() == Some(&'\'') { + chars.next(); // consume the '&' in the original iterator + let s = unescape_unicode_single_quoted_string(chars)?; + return Ok(Some(Token::UnicodeStringLiteral(s))); + } + } + // regular identifier starting with an "U" or "u" + let s = self.tokenize_word(x, chars); + Ok(Some(Token::make_word(&s, None))) + } // The spec only allows an uppercase 'X' to introduce a hex // string, but PostgreSQL, at least, allows a lowercase 'x' too. x @ 'x' | x @ 'X' => { @@ -1797,6 +1817,64 @@ impl<'a: 'b, 'b> Unescape<'a, 'b> { } } +fn unescape_unicode_single_quoted_string(chars: &mut State<'_>) -> Result { + let mut unescaped = String::new(); + chars.next(); // consume the opening quote + while let Some(c) = chars.next() { + match c { + '\'' => { + if chars.peek() == Some(&'\'') { + chars.next(); + unescaped.push('\''); + } else { + return Ok(unescaped); + } + } + '\\' => match chars.peek() { + Some('\\') => { + chars.next(); + unescaped.push('\\'); + } + Some('+') => { + chars.next(); + unescaped.push(take_char_from_hex_digits(chars, 6)?); + } + _ => unescaped.push(take_char_from_hex_digits(chars, 4)?), + }, + _ => { + unescaped.push(c); + } + } + } + Err(TokenizerError { + message: "Unterminated unicode encoded string literal".to_string(), + location: chars.location(), + }) +} + +fn take_char_from_hex_digits( + chars: &mut State<'_>, + max_digits: usize, +) -> Result { + let mut result = 0u32; + for _ in 0..max_digits { + let next_char = chars.next().ok_or_else(|| TokenizerError { + message: "Unexpected EOF while parsing hex digit in escaped unicode string." + .to_string(), + location: chars.location(), + })?; + let digit = next_char.to_digit(16).ok_or_else(|| TokenizerError { + message: format!("Invalid hex digit in escaped unicode string: {}", next_char), + location: chars.location(), + })?; + result = result * 16 + digit; + } + char::from_u32(result).ok_or_else(|| TokenizerError { + message: format!("Invalid unicode character: {:x}", result), + location: chars.location(), + }) +} + #[cfg(test)] mod tests { use super::*; diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 5ac421da0..44231e7d3 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -4441,3 +4441,35 @@ fn test_table_unnest_with_ordinality() { _ => panic!("Expecting TableFactor::UNNEST with ordinality"), } } + +#[test] +fn test_escaped_string_literal() { + match pg().verified_expr(r#"E'\n'"#) { + Expr::Value(Value::EscapedStringLiteral(s)) => { + assert_eq!("\n", s); + } + _ => unreachable!(), + } +} + +#[test] +fn test_unicode_string_literal() { + let pairs = [ + // Example from the postgres docs + (r#"U&'\0441\043B\043E\043D'"#, "слон"), + // High unicode code point (> 0xFFFF) + (r#"U&'\+01F418'"#, "🐘"), + // Escaped backslash + (r#"U&'\\'"#, r#"\"#), + // Escaped single quote + (r#"U&''''"#, "'"), + ]; + for (input, expected) in pairs { + match pg_and_generic().verified_expr(input) { + Expr::Value(Value::UnicodeStringLiteral(s)) => { + assert_eq!(expected, s); + } + _ => unreachable!(), + } + } +} From f96658006f85b1e88cc112a36584391c01ee766d Mon Sep 17 00:00:00 2001 From: hulk Date: Wed, 31 Jul 2024 04:30:46 +0800 Subject: [PATCH 16/57] Allow to use the GLOBAL keyword before the join operator (#1353) --- src/ast/query.rs | 7 ++++ src/keywords.rs | 1 + src/parser/mod.rs | 5 +++ src/test_utils.rs | 1 + tests/sqlparser_bigquery.rs | 1 + tests/sqlparser_common.rs | 71 ++++++++++++++++++++++++++++++++---- tests/sqlparser_mysql.rs | 1 + tests/sqlparser_postgres.rs | 1 + tests/sqlparser_snowflake.rs | 1 + 9 files changed, 81 insertions(+), 8 deletions(-) diff --git a/src/ast/query.rs b/src/ast/query.rs index 978604266..b318f686a 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -1537,6 +1537,9 @@ impl Display for TableVersion { #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct Join { pub relation: TableFactor, + /// ClickHouse supports the optional `GLOBAL` keyword before the join operator. + /// See [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/select/join) + pub global: bool, pub join_operator: JoinOperator, } @@ -1563,6 +1566,10 @@ impl fmt::Display for Join { } Suffix(constraint) } + if self.global { + write!(f, " GLOBAL")?; + } + match &self.join_operator { JoinOperator::Inner(constraint) => write!( f, diff --git a/src/keywords.rs b/src/keywords.rs index 4b599f12a..ee2bd6173 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -850,6 +850,7 @@ pub const RESERVED_FOR_TABLE_ALIAS: &[Keyword] = &[ Keyword::USING, Keyword::CLUSTER, Keyword::DISTRIBUTE, + Keyword::GLOBAL, // for MSSQL-specific OUTER APPLY (seems reserved in most dialects) Keyword::OUTER, Keyword::SET, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 2b1c1ab7f..cd2cf2186 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -9015,6 +9015,7 @@ impl<'a> Parser<'a> { // a table alias. let mut joins = vec![]; loop { + let global = self.parse_keyword(Keyword::GLOBAL); let join = if self.parse_keyword(Keyword::CROSS) { let join_operator = if self.parse_keyword(Keyword::JOIN) { JoinOperator::CrossJoin @@ -9026,6 +9027,7 @@ impl<'a> Parser<'a> { }; Join { relation: self.parse_table_factor()?, + global, join_operator, } } else if self.parse_keyword(Keyword::OUTER) { @@ -9033,6 +9035,7 @@ impl<'a> Parser<'a> { self.expect_keyword(Keyword::APPLY)?; Join { relation: self.parse_table_factor()?, + global, join_operator: JoinOperator::OuterApply, } } else if self.parse_keyword(Keyword::ASOF) { @@ -9042,6 +9045,7 @@ impl<'a> Parser<'a> { let match_condition = self.parse_parenthesized(Self::parse_expr)?; Join { relation, + global, join_operator: JoinOperator::AsOf { match_condition, constraint: self.parse_join_constraint(false)?, @@ -9127,6 +9131,7 @@ impl<'a> Parser<'a> { let join_constraint = self.parse_join_constraint(natural)?; Join { relation, + global, join_operator: join_operator_type(join_constraint), } }; diff --git a/src/test_utils.rs b/src/test_utils.rs index 5ed6339bd..b8e9ecee4 100644 --- a/src/test_utils.rs +++ b/src/test_utils.rs @@ -331,6 +331,7 @@ pub fn table_with_alias(name: impl Into, alias: impl Into) -> Ta pub fn join(relation: TableFactor) -> Join { Join { relation, + global: false, join_operator: JoinOperator::Inner(JoinConstraint::Natural), } } diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index 089a41889..a0dd5a662 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -1557,6 +1557,7 @@ fn parse_join_constraint_unnest_alias() { with_offset_alias: None, with_ordinality: false, }, + global: false, join_operator: JoinOperator::Inner(JoinConstraint::On(Expr::BinaryOp { left: Box::new(Expr::Identifier("c1".into())), op: BinaryOperator::Eq, diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 5de76f78f..a8f3919df 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -5600,6 +5600,7 @@ fn parse_implicit_join() { partitions: vec![], with_ordinality: false, }, + global: false, join_operator: JoinOperator::Inner(JoinConstraint::Natural), }], }, @@ -5623,6 +5624,7 @@ fn parse_implicit_join() { partitions: vec![], with_ordinality: false, }, + global: false, join_operator: JoinOperator::Inner(JoinConstraint::Natural), }], }, @@ -5646,6 +5648,7 @@ fn parse_cross_join() { partitions: vec![], with_ordinality: false, }, + global: false, join_operator: JoinOperator::CrossJoin, }, only(only(select.from).joins), @@ -5657,6 +5660,7 @@ fn parse_joins_on() { fn join_with_constraint( relation: impl Into, alias: Option, + global: bool, f: impl Fn(JoinConstraint) -> JoinOperator, ) -> Join { Join { @@ -5669,6 +5673,7 @@ fn parse_joins_on() { partitions: vec![], with_ordinality: false, }, + global, join_operator: f(JoinConstraint::On(Expr::BinaryOp { left: Box::new(Expr::Identifier("c1".into())), op: BinaryOperator::Eq, @@ -5682,6 +5687,7 @@ fn parse_joins_on() { vec![join_with_constraint( "t2", table_alias("foo"), + false, JoinOperator::Inner, )] ); @@ -5692,35 +5698,80 @@ fn parse_joins_on() { // Test parsing of different join operators assert_eq!( only(&verified_only_select("SELECT * FROM t1 JOIN t2 ON c1 = c2").from).joins, - vec![join_with_constraint("t2", None, JoinOperator::Inner)] + vec![join_with_constraint("t2", None, false, JoinOperator::Inner)] ); assert_eq!( only(&verified_only_select("SELECT * FROM t1 LEFT JOIN t2 ON c1 = c2").from).joins, - vec![join_with_constraint("t2", None, JoinOperator::LeftOuter)] + vec![join_with_constraint( + "t2", + None, + false, + JoinOperator::LeftOuter + )] ); assert_eq!( only(&verified_only_select("SELECT * FROM t1 RIGHT JOIN t2 ON c1 = c2").from).joins, - vec![join_with_constraint("t2", None, JoinOperator::RightOuter)] + vec![join_with_constraint( + "t2", + None, + false, + JoinOperator::RightOuter + )] ); assert_eq!( only(&verified_only_select("SELECT * FROM t1 LEFT SEMI JOIN t2 ON c1 = c2").from).joins, - vec![join_with_constraint("t2", None, JoinOperator::LeftSemi)] + vec![join_with_constraint( + "t2", + None, + false, + JoinOperator::LeftSemi + )] ); assert_eq!( only(&verified_only_select("SELECT * FROM t1 RIGHT SEMI JOIN t2 ON c1 = c2").from).joins, - vec![join_with_constraint("t2", None, JoinOperator::RightSemi)] + vec![join_with_constraint( + "t2", + None, + false, + JoinOperator::RightSemi + )] ); assert_eq!( only(&verified_only_select("SELECT * FROM t1 LEFT ANTI JOIN t2 ON c1 = c2").from).joins, - vec![join_with_constraint("t2", None, JoinOperator::LeftAnti)] + vec![join_with_constraint( + "t2", + None, + false, + JoinOperator::LeftAnti + )] ); assert_eq!( only(&verified_only_select("SELECT * FROM t1 RIGHT ANTI JOIN t2 ON c1 = c2").from).joins, - vec![join_with_constraint("t2", None, JoinOperator::RightAnti)] + vec![join_with_constraint( + "t2", + None, + false, + JoinOperator::RightAnti + )] ); assert_eq!( only(&verified_only_select("SELECT * FROM t1 FULL JOIN t2 ON c1 = c2").from).joins, - vec![join_with_constraint("t2", None, JoinOperator::FullOuter)] + vec![join_with_constraint( + "t2", + None, + false, + JoinOperator::FullOuter + )] + ); + + assert_eq!( + only(&verified_only_select("SELECT * FROM t1 GLOBAL FULL JOIN t2 ON c1 = c2").from).joins, + vec![join_with_constraint( + "t2", + None, + true, + JoinOperator::FullOuter + )] ); } @@ -5741,6 +5792,7 @@ fn parse_joins_using() { partitions: vec![], with_ordinality: false, }, + global: false, join_operator: f(JoinConstraint::Using(vec!["c1".into()])), } } @@ -5805,6 +5857,7 @@ fn parse_natural_join() { partitions: vec![], with_ordinality: false, }, + global: false, join_operator: f(JoinConstraint::Natural), } } @@ -6073,6 +6126,7 @@ fn parse_derived_tables() { partitions: vec![], with_ordinality: false, }, + global: false, join_operator: JoinOperator::Inner(JoinConstraint::Natural), }], }), @@ -6983,6 +7037,7 @@ fn lateral_function() { ], alias: None, }, + global: false, join_operator: JoinOperator::LeftOuter(JoinConstraint::None), }], }], diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index b0b29f347..1c9c009d9 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -1891,6 +1891,7 @@ fn parse_update_with_joins() { partitions: vec![], with_ordinality: false, }, + global: false, join_operator: JoinOperator::Inner(JoinConstraint::On(Expr::BinaryOp { left: Box::new(Expr::CompoundIdentifier(vec![ Ident::new("o"), diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 44231e7d3..6410199ab 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -4102,6 +4102,7 @@ fn parse_join_constraint_unnest_alias() { with_offset_alias: None, with_ordinality: false, }, + global: false, join_operator: JoinOperator::Inner(JoinConstraint::On(Expr::BinaryOp { left: Box::new(Expr::Identifier("c1".into())), op: BinaryOperator::Eq, diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index 7abb1a947..eaf8c1d14 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -2206,6 +2206,7 @@ fn asof_joins() { relation: table_with_alias("trades_unixtime", "tu"), joins: vec![Join { relation: table_with_alias("quotes_unixtime", "qu"), + global: false, join_operator: JoinOperator::AsOf { match_condition: Expr::BinaryOp { left: Box::new(Expr::CompoundIdentifier(vec![ From cc13841a370190df00cfc623593453054ac187e9 Mon Sep 17 00:00:00 2001 From: hulk Date: Wed, 31 Jul 2024 04:31:42 +0800 Subject: [PATCH 17/57] Add support of parsing ON CLUSTER in ALTER TABLE for ClickHouse (#1342) --- src/ast/dml.rs | 8 ++---- src/ast/helpers/stmt_create_table.rs | 4 +-- src/ast/mod.rs | 11 +++++++- src/parser/mod.rs | 21 +++++++-------- src/test_utils.rs | 1 + tests/sqlparser_common.rs | 38 ++++++++++++++++++++++++++-- tests/sqlparser_mysql.rs | 3 +++ tests/sqlparser_postgres.rs | 2 ++ 8 files changed, 67 insertions(+), 21 deletions(-) diff --git a/src/ast/dml.rs b/src/ast/dml.rs index 0ebbaa3e9..aad7d2e22 100644 --- a/src/ast/dml.rs +++ b/src/ast/dml.rs @@ -126,7 +126,7 @@ pub struct CreateTable { pub on_commit: Option, /// ClickHouse "ON CLUSTER" clause: /// - pub on_cluster: Option, + pub on_cluster: Option, /// ClickHouse "PRIMARY KEY " clause. /// pub primary_key: Option>, @@ -206,11 +206,7 @@ impl Display for CreateTable { name = self.name, )?; if let Some(on_cluster) = &self.on_cluster { - write!( - f, - " ON CLUSTER {}", - on_cluster.replace('{', "'{").replace('}', "}'") - )?; + write!(f, " ON CLUSTER {}", on_cluster)?; } if !self.columns.is_empty() || !self.constraints.is_empty() { write!(f, " ({}", display_comma_separated(&self.columns))?; diff --git a/src/ast/helpers/stmt_create_table.rs b/src/ast/helpers/stmt_create_table.rs index 92c75e6a4..19efaeece 100644 --- a/src/ast/helpers/stmt_create_table.rs +++ b/src/ast/helpers/stmt_create_table.rs @@ -73,7 +73,7 @@ pub struct CreateTableBuilder { pub default_charset: Option, pub collation: Option, pub on_commit: Option, - pub on_cluster: Option, + pub on_cluster: Option, pub primary_key: Option>, pub order_by: Option>, pub partition_by: Option>, @@ -261,7 +261,7 @@ impl CreateTableBuilder { self } - pub fn on_cluster(mut self, on_cluster: Option) -> Self { + pub fn on_cluster(mut self, on_cluster: Option) -> Self { self.on_cluster = on_cluster; self } diff --git a/src/ast/mod.rs b/src/ast/mod.rs index d27baadc4..70f96c5c5 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -2162,6 +2162,10 @@ pub enum Statement { only: bool, operations: Vec, location: Option, + /// ClickHouse dialect supports `ON CLUSTER` clause for ALTER TABLE + /// For example: `ALTER TABLE table_name ON CLUSTER cluster_name ADD COLUMN c UInt32` + /// [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/alter/update) + on_cluster: Option, }, /// ```sql /// ALTER INDEX @@ -3632,6 +3636,7 @@ impl fmt::Display for Statement { only, operations, location, + on_cluster, } => { write!(f, "ALTER TABLE ")?; if *if_exists { @@ -3640,9 +3645,13 @@ impl fmt::Display for Statement { if *only { write!(f, "ONLY ")?; } + write!(f, "{name} ", name = name)?; + if let Some(cluster) = on_cluster { + write!(f, "ON CLUSTER {cluster} ")?; + } write!( f, - "{name} {operations}", + "{operations}", operations = display_comma_separated(operations) )?; if let Some(loc) = location { diff --git a/src/parser/mod.rs b/src/parser/mod.rs index cd2cf2186..725e24bfb 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -5379,6 +5379,14 @@ impl<'a> Parser<'a> { } } + fn parse_optional_on_cluster(&mut self) -> Result, ParserError> { + if self.parse_keywords(&[Keyword::ON, Keyword::CLUSTER]) { + Ok(Some(self.parse_identifier(false)?)) + } else { + Ok(None) + } + } + pub fn parse_create_table( &mut self, or_replace: bool, @@ -5391,16 +5399,7 @@ impl<'a> Parser<'a> { let table_name = self.parse_object_name(allow_unquoted_hyphen)?; // Clickhouse has `ON CLUSTER 'cluster'` syntax for DDLs - let on_cluster = if self.parse_keywords(&[Keyword::ON, Keyword::CLUSTER]) { - let next_token = self.next_token(); - match next_token.token { - Token::SingleQuotedString(s) => Some(s), - Token::Word(s) => Some(s.to_string()), - _ => self.expected("identifier or cluster literal", next_token)?, - } - } else { - None - }; + let on_cluster = self.parse_optional_on_cluster()?; let like = if self.parse_keyword(Keyword::LIKE) || self.parse_keyword(Keyword::ILIKE) { self.parse_object_name(allow_unquoted_hyphen).ok() @@ -6583,6 +6582,7 @@ impl<'a> Parser<'a> { let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); let only = self.parse_keyword(Keyword::ONLY); // [ ONLY ] let table_name = self.parse_object_name(false)?; + let on_cluster = self.parse_optional_on_cluster()?; let operations = self.parse_comma_separated(Parser::parse_alter_table_operation)?; let mut location = None; @@ -6604,6 +6604,7 @@ impl<'a> Parser<'a> { only, operations, location, + on_cluster, }) } Keyword::INDEX => { diff --git a/src/test_utils.rs b/src/test_utils.rs index b8e9ecee4..d9100d351 100644 --- a/src/test_utils.rs +++ b/src/test_utils.rs @@ -274,6 +274,7 @@ pub fn alter_table_op_with_name(stmt: Statement, expected_name: &str) -> AlterTa if_exists, only: is_only, operations, + on_cluster: _, location: _, } => { assert_eq!(name.to_string(), expected_name); diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index a8f3919df..44e245254 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -3506,7 +3506,7 @@ fn parse_create_table_on_cluster() { let sql = "CREATE TABLE t ON CLUSTER '{cluster}' (a INT, b INT)"; match generic.verified_stmt(sql) { Statement::CreateTable(CreateTable { on_cluster, .. }) => { - assert_eq!(on_cluster.unwrap(), "{cluster}".to_string()); + assert_eq!(on_cluster.unwrap().to_string(), "'{cluster}'".to_string()); } _ => unreachable!(), } @@ -3515,7 +3515,7 @@ fn parse_create_table_on_cluster() { let sql = "CREATE TABLE t ON CLUSTER my_cluster (a INT, b INT)"; match generic.verified_stmt(sql) { Statement::CreateTable(CreateTable { on_cluster, .. }) => { - assert_eq!(on_cluster.unwrap(), "my_cluster".to_string()); + assert_eq!(on_cluster.unwrap().to_string(), "my_cluster".to_string()); } _ => unreachable!(), } @@ -3822,6 +3822,40 @@ fn parse_alter_table() { } } +#[test] +fn test_alter_table_with_on_cluster() { + match all_dialects() + .verified_stmt("ALTER TABLE t ON CLUSTER 'cluster' ADD CONSTRAINT bar PRIMARY KEY (baz)") + { + Statement::AlterTable { + name, on_cluster, .. + } => { + std::assert_eq!(name.to_string(), "t"); + std::assert_eq!(on_cluster, Some(Ident::with_quote('\'', "cluster"))); + } + _ => unreachable!(), + } + + match all_dialects() + .verified_stmt("ALTER TABLE t ON CLUSTER cluster_name ADD CONSTRAINT bar PRIMARY KEY (baz)") + { + Statement::AlterTable { + name, on_cluster, .. + } => { + std::assert_eq!(name.to_string(), "t"); + std::assert_eq!(on_cluster, Some(Ident::new("cluster_name"))); + } + _ => unreachable!(), + } + + let res = all_dialects() + .parse_sql_statements("ALTER TABLE t ON CLUSTER 123 ADD CONSTRAINT bar PRIMARY KEY (baz)"); + std::assert_eq!( + res.unwrap_err(), + ParserError::ParserError("Expected: identifier, found: 123".to_string()) + ) +} + #[test] fn parse_alter_index() { let rename_index = "ALTER INDEX idx RENAME TO new_idx"; diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index 1c9c009d9..397a722b5 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -1976,6 +1976,7 @@ fn parse_alter_table_add_column() { only, operations, location: _, + on_cluster: _, } => { assert_eq!(name.to_string(), "tab"); assert!(!if_exists); @@ -2005,6 +2006,7 @@ fn parse_alter_table_add_column() { only, operations, location: _, + on_cluster: _, } => { assert_eq!(name.to_string(), "tab"); assert!(!if_exists); @@ -2042,6 +2044,7 @@ fn parse_alter_table_add_columns() { only, operations, location: _, + on_cluster: _, } => { assert_eq!(name.to_string(), "tab"); assert!(!if_exists); diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 6410199ab..7406bdd74 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -677,6 +677,7 @@ fn parse_alter_table_add_columns() { only, operations, location: _, + on_cluster: _, } => { assert_eq!(name.to_string(), "tab"); assert!(if_exists); @@ -759,6 +760,7 @@ fn parse_alter_table_owner_to() { only: _, operations, location: _, + on_cluster: _, } => { assert_eq!(name.to_string(), "tab"); assert_eq!( From a692ba5fd1902e0c40dc5714304594aee642a899 Mon Sep 17 00:00:00 2001 From: hulk Date: Fri, 2 Aug 2024 05:20:56 +0800 Subject: [PATCH 18/57] Add support of parsing OPTIMIZE TABLE statement for ClickHouse (#1359) --- src/ast/ddl.rs | 41 +++++++++++++++++++----- src/ast/mod.rs | 36 ++++++++++++++++++++- src/keywords.rs | 3 ++ src/parser/mod.rs | 45 +++++++++++++++++++++++++- tests/sqlparser_clickhouse.rs | 60 +++++++++++++++++++++++++++++++++++ 5 files changed, 175 insertions(+), 10 deletions(-) diff --git a/src/ast/ddl.rs b/src/ast/ddl.rs index 5cc671cf5..af679d469 100644 --- a/src/ast/ddl.rs +++ b/src/ast/ddl.rs @@ -1296,20 +1296,45 @@ impl fmt::Display for UserDefinedTypeCompositeAttributeDef { } } -/// PARTITION statement used in ALTER TABLE et al. such as in Hive SQL +/// PARTITION statement used in ALTER TABLE et al. such as in Hive and ClickHouse SQL. +/// For example, ClickHouse's OPTIMIZE TABLE supports syntax like PARTITION ID 'partition_id' and PARTITION expr. +/// [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/optimize) #[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] -pub struct Partition { - pub partitions: Vec, +pub enum Partition { + Identifier(Ident), + Expr(Expr), + Partitions(Vec), } impl fmt::Display for Partition { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!( - f, - "PARTITION ({})", - display_comma_separated(&self.partitions) - ) + match self { + Partition::Identifier(id) => write!(f, "PARTITION ID {id}"), + Partition::Expr(expr) => write!(f, "PARTITION {expr}"), + Partition::Partitions(partitions) => { + write!(f, "PARTITION ({})", display_comma_separated(partitions)) + } + } + } +} + +/// DEDUPLICATE statement used in OPTIMIZE TABLE et al. such as in ClickHouse SQL +/// [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/optimize) +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum Deduplicate { + All, + ByExpression(Expr), +} + +impl fmt::Display for Deduplicate { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Deduplicate::All => write!(f, "DEDUPLICATE"), + Deduplicate::ByExpression(expr) => write!(f, "DEDUPLICATE BY {expr}"), + } } } diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 70f96c5c5..6444556ef 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -33,7 +33,7 @@ pub use self::data_type::{ pub use self::dcl::{AlterRoleOperation, ResetConfig, RoleOption, SetConfigValue}; pub use self::ddl::{ AlterColumnOperation, AlterIndexOperation, AlterTableOperation, ColumnDef, ColumnOption, - ColumnOptionDef, ConstraintCharacteristics, DeferrableInitial, GeneratedAs, + ColumnOptionDef, ConstraintCharacteristics, Deduplicate, DeferrableInitial, GeneratedAs, GeneratedExpressionMode, IndexOption, IndexType, KeyOrIndexDisplay, Owner, Partition, ProcedureParam, ReferentialAction, TableConstraint, UserDefinedTypeCompositeAttributeDef, UserDefinedTypeRepresentation, ViewColumnDef, @@ -2831,6 +2831,18 @@ pub enum Statement { to: Ident, with: Vec, }, + /// ```sql + /// OPTIMIZE TABLE [db.]name [ON CLUSTER cluster] [PARTITION partition | PARTITION ID 'partition_id'] [FINAL] [DEDUPLICATE [BY expression]] + /// ``` + /// + /// See ClickHouse + OptimizeTable { + name: ObjectName, + on_cluster: Option, + partition: Option, + include_final: bool, + deduplicate: Option, + }, } impl fmt::Display for Statement { @@ -4283,6 +4295,28 @@ impl fmt::Display for Statement { Ok(()) } + Statement::OptimizeTable { + name, + on_cluster, + partition, + include_final, + deduplicate, + } => { + write!(f, "OPTIMIZE TABLE {name}")?; + if let Some(on_cluster) = on_cluster { + write!(f, " ON CLUSTER {on_cluster}", on_cluster = on_cluster)?; + } + if let Some(partition) = partition { + write!(f, " {partition}", partition = partition)?; + } + if *include_final { + write!(f, " FINAL")?; + } + if let Some(deduplicate) = deduplicate { + write!(f, " {deduplicate}")?; + } + Ok(()) + } } } } diff --git a/src/keywords.rs b/src/keywords.rs index ee2bd6173..49bd969af 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -218,6 +218,7 @@ define_keywords!( DECADE, DECIMAL, DECLARE, + DEDUPLICATE, DEFAULT, DEFAULT_DDL_COLLATION, DEFERRABLE, @@ -301,6 +302,7 @@ define_keywords!( FILE_FORMAT, FILL, FILTER, + FINAL, FIRST, FIRST_VALUE, FIXEDSTRING, @@ -354,6 +356,7 @@ define_keywords!( HOSTS, HOUR, HOURS, + ID, IDENTITY, IF, IGNORE, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 725e24bfb..67d58ea75 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -551,6 +551,10 @@ impl<'a> Parser<'a> { Keyword::LOAD if dialect_of!(self is DuckDbDialect | GenericDialect) => { Ok(self.parse_load()?) } + // `OPTIMIZE` is clickhouse specific https://clickhouse.tech/docs/en/sql-reference/statements/optimize/ + Keyword::OPTIMIZE if dialect_of!(self is ClickHouseDialect | GenericDialect) => { + Ok(self.parse_optimize_table()?) + } _ => self.expected("an SQL statement", next_token), }, Token::LParen => { @@ -6270,7 +6274,7 @@ impl<'a> Parser<'a> { self.expect_token(&Token::LParen)?; let partitions = self.parse_comma_separated(Parser::parse_expr)?; self.expect_token(&Token::RParen)?; - Ok(Partition { partitions }) + Ok(Partition::Partitions(partitions)) } pub fn parse_alter_table_operation(&mut self) -> Result { @@ -11165,6 +11169,45 @@ impl<'a> Parser<'a> { Ok(Statement::Load { extension_name }) } + /// ```sql + /// OPTIMIZE TABLE [db.]name [ON CLUSTER cluster] [PARTITION partition | PARTITION ID 'partition_id'] [FINAL] [DEDUPLICATE [BY expression]] + /// ``` + /// [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/optimize) + pub fn parse_optimize_table(&mut self) -> Result { + self.expect_keyword(Keyword::TABLE)?; + let name = self.parse_object_name(false)?; + let on_cluster = self.parse_optional_on_cluster()?; + + let partition = if self.parse_keyword(Keyword::PARTITION) { + if self.parse_keyword(Keyword::ID) { + Some(Partition::Identifier(self.parse_identifier(false)?)) + } else { + Some(Partition::Expr(self.parse_expr()?)) + } + } else { + None + }; + + let include_final = self.parse_keyword(Keyword::FINAL); + let deduplicate = if self.parse_keyword(Keyword::DEDUPLICATE) { + if self.parse_keyword(Keyword::BY) { + Some(Deduplicate::ByExpression(self.parse_expr()?)) + } else { + Some(Deduplicate::All) + } + } else { + None + }; + + Ok(Statement::OptimizeTable { + name, + on_cluster, + partition, + include_final, + deduplicate, + }) + } + /// ```sql /// CREATE [ { TEMPORARY | TEMP } ] SEQUENCE [ IF NOT EXISTS ] /// ``` diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index 6fdadc366..5263be29e 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -25,6 +25,7 @@ use sqlparser::ast::Value::Number; use sqlparser::ast::*; use sqlparser::dialect::ClickHouseDialect; use sqlparser::dialect::GenericDialect; +use sqlparser::parser::ParserError::ParserError; #[test] fn parse_map_access_expr() { @@ -221,6 +222,65 @@ fn parse_create_table() { ); } +#[test] +fn parse_optimize_table() { + clickhouse_and_generic().verified_stmt("OPTIMIZE TABLE t0"); + clickhouse_and_generic().verified_stmt("OPTIMIZE TABLE db.t0"); + clickhouse_and_generic().verified_stmt("OPTIMIZE TABLE t0 ON CLUSTER 'cluster'"); + clickhouse_and_generic().verified_stmt("OPTIMIZE TABLE t0 ON CLUSTER 'cluster' FINAL"); + clickhouse_and_generic().verified_stmt("OPTIMIZE TABLE t0 FINAL DEDUPLICATE"); + clickhouse_and_generic().verified_stmt("OPTIMIZE TABLE t0 DEDUPLICATE"); + clickhouse_and_generic().verified_stmt("OPTIMIZE TABLE t0 DEDUPLICATE BY id"); + clickhouse_and_generic().verified_stmt("OPTIMIZE TABLE t0 FINAL DEDUPLICATE BY id"); + clickhouse_and_generic() + .verified_stmt("OPTIMIZE TABLE t0 PARTITION tuple('2023-04-22') DEDUPLICATE BY id"); + match clickhouse_and_generic().verified_stmt( + "OPTIMIZE TABLE t0 ON CLUSTER cluster PARTITION ID '2024-07' FINAL DEDUPLICATE BY id", + ) { + Statement::OptimizeTable { + name, + on_cluster, + partition, + include_final, + deduplicate, + .. + } => { + assert_eq!(name.to_string(), "t0"); + assert_eq!(on_cluster, Some(Ident::new("cluster"))); + assert_eq!( + partition, + Some(Partition::Identifier(Ident::with_quote('\'', "2024-07"))) + ); + assert!(include_final); + assert_eq!( + deduplicate, + Some(Deduplicate::ByExpression(Identifier(Ident::new("id")))) + ); + } + _ => unreachable!(), + } + + // negative cases + assert_eq!( + clickhouse_and_generic() + .parse_sql_statements("OPTIMIZE TABLE t0 DEDUPLICATE BY") + .unwrap_err(), + ParserError("Expected: an expression:, found: EOF".to_string()) + ); + assert_eq!( + clickhouse_and_generic() + .parse_sql_statements("OPTIMIZE TABLE t0 PARTITION") + .unwrap_err(), + ParserError("Expected: an expression:, found: EOF".to_string()) + ); + assert_eq!( + clickhouse_and_generic() + .parse_sql_statements("OPTIMIZE TABLE t0 PARTITION ID") + .unwrap_err(), + ParserError("Expected: identifier, found: EOF".to_string()) + ); +} + fn column_def(name: Ident, data_type: DataType) -> ColumnDef { ColumnDef { name, From d49acc67b13e1d68f2e6a25546161a68e165da4f Mon Sep 17 00:00:00 2001 From: Jesse Date: Thu, 1 Aug 2024 23:28:15 +0200 Subject: [PATCH 19/57] Parse SETTINGS clause for ClickHouse table-valued functions (#1358) --- src/ast/mod.rs | 4 +- src/ast/query.rs | 25 ++++++++- src/parser/mod.rs | 95 ++++++++++++++++++++++++----------- tests/sqlparser_clickhouse.rs | 77 ++++++++++++++++++++++++++++ 4 files changed, 167 insertions(+), 34 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 6444556ef..e0c929a9d 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -50,8 +50,8 @@ pub use self::query::{ OffsetRows, OrderBy, OrderByExpr, PivotValueSource, Query, RenameSelectItem, RepetitionQuantifier, ReplaceSelectElement, ReplaceSelectItem, RowsPerMatch, Select, SelectInto, SelectItem, SetExpr, SetOperator, SetQuantifier, Setting, SymbolDefinition, Table, - TableAlias, TableFactor, TableVersion, TableWithJoins, Top, TopQuantity, ValueTableMode, - Values, WildcardAdditionalOptions, With, WithFill, + TableAlias, TableFactor, TableFunctionArgs, TableVersion, TableWithJoins, Top, TopQuantity, + ValueTableMode, Values, WildcardAdditionalOptions, With, WithFill, }; pub use self::value::{ escape_double_quote_string, escape_quoted_string, DateTimeField, DollarQuotedString, diff --git a/src/ast/query.rs b/src/ast/query.rs index b318f686a..cda7430be 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -899,6 +899,19 @@ impl fmt::Display for ExprWithAlias { } } +/// Arguments to a table-valued function +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct TableFunctionArgs { + pub args: Vec, + /// ClickHouse-specific SETTINGS clause. + /// For example, + /// `SELECT * FROM executable('generate_random.py', TabSeparated, 'id UInt32, random String', SETTINGS send_chunk_header = false, pool_size = 16)` + /// [`executable` table function](https://clickhouse.com/docs/en/engines/table-functions/executable) + pub settings: Option>, +} + /// A table name or a parenthesized subquery with an optional alias #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] @@ -916,7 +929,7 @@ pub enum TableFactor { /// This field's value is `Some(v)`, where `v` is a (possibly empty) /// vector of arguments, in the case of a table-valued function call, /// whereas it's `None` in the case of a regular table name. - args: Option>, + args: Option, /// MSSQL-specific `WITH (...)` hints such as NOLOCK. with_hints: Vec, /// Optional version qualifier to facilitate table time-travel, as @@ -1314,7 +1327,15 @@ impl fmt::Display for TableFactor { write!(f, "PARTITION ({})", display_comma_separated(partitions))?; } if let Some(args) = args { - write!(f, "({})", display_comma_separated(args))?; + write!(f, "(")?; + write!(f, "{}", display_comma_separated(&args.args))?; + if let Some(ref settings) = args.settings { + if !args.args.is_empty() { + write!(f, ", ")?; + } + write!(f, "SETTINGS {}", display_comma_separated(settings))?; + } + write!(f, ")")?; } if *with_ordinality { write!(f, " WITH ORDINALITY")?; diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 67d58ea75..da9ca2672 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -3430,6 +3430,29 @@ impl<'a> Parser<'a> { Ok(values) } + /// Parse the comma of a comma-separated syntax element. + /// Returns true if there is a next element + fn is_parse_comma_separated_end(&mut self) -> bool { + if !self.consume_token(&Token::Comma) { + true + } else if self.options.trailing_commas { + let token = self.peek_token().token; + match token { + Token::Word(ref kw) + if keywords::RESERVED_FOR_COLUMN_ALIAS.contains(&kw.keyword) => + { + true + } + Token::RParen | Token::SemiColon | Token::EOF | Token::RBracket | Token::RBrace => { + true + } + _ => false, + } + } else { + false + } + } + /// Parse a comma-separated list of 1+ items accepted by `F` pub fn parse_comma_separated(&mut self, mut f: F) -> Result, ParserError> where @@ -3438,22 +3461,8 @@ impl<'a> Parser<'a> { let mut values = vec![]; loop { values.push(f(self)?); - if !self.consume_token(&Token::Comma) { + if self.is_parse_comma_separated_end() { break; - } else if self.options.trailing_commas { - match self.peek_token().token { - Token::Word(kw) - if keywords::RESERVED_FOR_COLUMN_ALIAS.contains(&kw.keyword) => - { - break; - } - Token::RParen - | Token::SemiColon - | Token::EOF - | Token::RBracket - | Token::RBrace => break, - _ => continue, - } } } Ok(values) @@ -8104,19 +8113,7 @@ impl<'a> Parser<'a> { vec![] }; - let settings = if dialect_of!(self is ClickHouseDialect|GenericDialect) - && self.parse_keyword(Keyword::SETTINGS) - { - let key_values = self.parse_comma_separated(|p| { - let key = p.parse_identifier(false)?; - p.expect_token(&Token::Eq)?; - let value = p.parse_value()?; - Ok(Setting { key, value }) - })?; - Some(key_values) - } else { - None - }; + let settings = self.parse_settings()?; let fetch = if self.parse_keyword(Keyword::FETCH) { Some(self.parse_fetch()?) @@ -8163,6 +8160,23 @@ impl<'a> Parser<'a> { } } + fn parse_settings(&mut self) -> Result>, ParserError> { + let settings = if dialect_of!(self is ClickHouseDialect|GenericDialect) + && self.parse_keyword(Keyword::SETTINGS) + { + let key_values = self.parse_comma_separated(|p| { + let key = p.parse_identifier(false)?; + p.expect_token(&Token::Eq)?; + let value = p.parse_value()?; + Ok(Setting { key, value }) + })?; + Some(key_values) + } else { + None + }; + Ok(settings) + } + /// Parse a mssql `FOR [XML | JSON | BROWSE]` clause pub fn parse_for_clause(&mut self) -> Result, ParserError> { if self.parse_keyword(Keyword::XML) { @@ -9382,9 +9396,9 @@ impl<'a> Parser<'a> { // Parse potential version qualifier let version = self.parse_table_version()?; - // Postgres, MSSQL: table-valued functions: + // Postgres, MSSQL, ClickHouse: table-valued functions: let args = if self.consume_token(&Token::LParen) { - Some(self.parse_optional_args()?) + Some(self.parse_table_function_args()?) } else { None }; @@ -10327,6 +10341,27 @@ impl<'a> Parser<'a> { } } + fn parse_table_function_args(&mut self) -> Result { + if self.consume_token(&Token::RParen) { + return Ok(TableFunctionArgs { + args: vec![], + settings: None, + }); + } + let mut args = vec![]; + let settings = loop { + if let Some(settings) = self.parse_settings()? { + break Some(settings); + } + args.push(self.parse_function_args()?); + if self.is_parse_comma_separated_end() { + break None; + } + }; + self.expect_token(&Token::RParen)?; + Ok(TableFunctionArgs { args, settings }) + } + /// Parses a potentially empty list of arguments to a window function /// (including the closing parenthesis). /// diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index 5263be29e..4108958fb 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -1151,6 +1151,83 @@ fn parse_create_table_on_commit_and_as_query() { } } +#[test] +fn parse_select_table_function_settings() { + fn check_settings(sql: &str, expected: &TableFunctionArgs) { + match clickhouse_and_generic().verified_stmt(sql) { + Statement::Query(q) => { + let from = &q.body.as_select().unwrap().from; + assert_eq!(from.len(), 1); + assert_eq!(from[0].joins, vec![]); + match &from[0].relation { + Table { args, .. } => { + let args = args.as_ref().unwrap(); + assert_eq!(args, expected); + } + _ => unreachable!(), + } + } + _ => unreachable!(), + } + } + check_settings( + "SELECT * FROM table_function(arg, SETTINGS s0 = 3, s1 = 's')", + &TableFunctionArgs { + args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr( + Expr::Identifier("arg".into()), + ))], + + settings: Some(vec![ + Setting { + key: "s0".into(), + value: Value::Number("3".parse().unwrap(), false), + }, + Setting { + key: "s1".into(), + value: Value::SingleQuotedString("s".into()), + }, + ]), + }, + ); + check_settings( + r#"SELECT * FROM table_function(arg)"#, + &TableFunctionArgs { + args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr( + Expr::Identifier("arg".into()), + ))], + settings: None, + }, + ); + check_settings( + "SELECT * FROM table_function(SETTINGS s0 = 3, s1 = 's')", + &TableFunctionArgs { + args: vec![], + settings: Some(vec![ + Setting { + key: "s0".into(), + value: Value::Number("3".parse().unwrap(), false), + }, + Setting { + key: "s1".into(), + value: Value::SingleQuotedString("s".into()), + }, + ]), + }, + ); + let invalid_cases = vec![ + "SELECT * FROM t(SETTINGS a)", + "SELECT * FROM t(SETTINGS a=)", + "SELECT * FROM t(SETTINGS a=1, b)", + "SELECT * FROM t(SETTINGS a=1, b=)", + "SELECT * FROM t(SETTINGS a=1, b=c)", + ]; + for sql in invalid_cases { + clickhouse_and_generic() + .parse_sql_statements(sql) + .expect_err("Expected: SETTINGS key = value, found: "); + } +} + fn clickhouse() -> TestedDialects { TestedDialects { dialects: vec![Box::new(ClickHouseDialect {})], From 8f8c96f87ffe58945a0875c9c897f36c989b0095 Mon Sep 17 00:00:00 2001 From: Jax Liu Date: Sun, 4 Aug 2024 19:47:32 +0800 Subject: [PATCH 20/57] Support parsing empty map literal syntax for DuckDB and Genric (#1361) --- src/parser/mod.rs | 42 ++++++++++++++++----------------------- tests/sqlparser_common.rs | 2 ++ 2 files changed, 19 insertions(+), 25 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index da9ca2672..fe8acb4f2 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1849,17 +1849,9 @@ impl<'a> Parser<'a> { /// Parses an array expression `[ex1, ex2, ..]` /// if `named` is `true`, came from an expression like `ARRAY[ex1, ex2]` pub fn parse_array_expr(&mut self, named: bool) -> Result { - if self.peek_token().token == Token::RBracket { - let _ = self.next_token(); // consume ] - Ok(Expr::Array(Array { - elem: vec![], - named, - })) - } else { - let exprs = self.parse_comma_separated(Parser::parse_expr)?; - self.expect_token(&Token::RBracket)?; - Ok(Expr::Array(Array { elem: exprs, named })) - } + let exprs = self.parse_comma_separated0(Parser::parse_expr, Token::RBracket)?; + self.expect_token(&Token::RBracket)?; + Ok(Expr::Array(Array { elem: exprs, named })) } pub fn parse_listagg_on_overflow(&mut self) -> Result, ParserError> { @@ -2352,11 +2344,8 @@ impl<'a> Parser<'a> { /// [map]: https://duckdb.org/docs/sql/data_types/map.html#creating-maps fn parse_duckdb_map_literal(&mut self) -> Result { self.expect_token(&Token::LBrace)?; - - let fields = self.parse_comma_separated(Self::parse_duckdb_map_field)?; - + let fields = self.parse_comma_separated0(Self::parse_duckdb_map_field, Token::RBrace)?; self.expect_token(&Token::RBrace)?; - Ok(Expr::Map(Map { entries: fields })) } @@ -2937,7 +2926,7 @@ impl<'a> Parser<'a> { Expr::InList { expr: Box::new(expr), list: if self.dialect.supports_in_empty_list() { - self.parse_comma_separated0(Parser::parse_expr)? + self.parse_comma_separated0(Parser::parse_expr, Token::RParen)? } else { self.parse_comma_separated(Parser::parse_expr)? }, @@ -3479,18 +3468,20 @@ impl<'a> Parser<'a> { } /// Parse a comma-separated list of 0+ items accepted by `F` - pub fn parse_comma_separated0(&mut self, f: F) -> Result, ParserError> + /// * `end_token` - expected end token for the closure (e.g. [Token::RParen], [Token::RBrace] ...) + pub fn parse_comma_separated0( + &mut self, + f: F, + end_token: Token, + ) -> Result, ParserError> where F: FnMut(&mut Parser<'a>) -> Result, { - // () - if matches!(self.peek_token().token, Token::RParen) { + if self.peek_token().token == end_token { return Ok(vec![]); } - // (,) - if self.options.trailing_commas - && matches!(self.peek_tokens(), [Token::Comma, Token::RParen]) - { + + if self.options.trailing_commas && self.peek_tokens() == [Token::Comma, end_token] { let _ = self.consume_token(&Token::Comma); return Ok(vec![]); } @@ -4059,7 +4050,7 @@ impl<'a> Parser<'a> { }) }; self.expect_token(&Token::LParen)?; - let args = self.parse_comma_separated0(parse_function_param)?; + let args = self.parse_comma_separated0(parse_function_param, Token::RParen)?; self.expect_token(&Token::RParen)?; let return_type = if self.parse_keyword(Keyword::RETURNS) { @@ -10713,7 +10704,8 @@ impl<'a> Parser<'a> { } if self.consume_token(&Token::LParen) { - let interpolations = self.parse_comma_separated0(|p| p.parse_interpolation())?; + let interpolations = + self.parse_comma_separated0(|p| p.parse_interpolation(), Token::RParen)?; self.expect_token(&Token::RParen)?; // INTERPOLATE () and INTERPOLATE ( ... ) variants return Ok(Some(Interpolate { diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 44e245254..7ec017269 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -10282,6 +10282,8 @@ fn test_map_syntax() { }), }, ); + + check("MAP {}", Expr::Map(Map { entries: vec![] })); } #[test] From a5480ae4982d84d37c6294b3e70ca24fb72d6a4d Mon Sep 17 00:00:00 2001 From: Samuel Colvin Date: Tue, 6 Aug 2024 12:49:37 +0100 Subject: [PATCH 21/57] Support `Dialect` level precedence, update Postgres `Dialect` to match Postgres (#1360) --- src/ast/operator.rs | 2 +- src/dialect/mod.rs | 165 +++++++++++++++++++++++++++++++++++- src/dialect/postgresql.rs | 134 +++++++++++++++++++++++++++++ src/dialect/snowflake.rs | 9 ++ src/parser/mod.rs | 148 +++++--------------------------- tests/sqlparser_postgres.rs | 112 ++++++++++++++++++++++++ 6 files changed, 440 insertions(+), 130 deletions(-) diff --git a/src/ast/operator.rs b/src/ast/operator.rs index e70df344a..db6ed0564 100644 --- a/src/ast/operator.rs +++ b/src/ast/operator.rs @@ -151,7 +151,7 @@ pub enum BinaryOperator { Arrow, /// The `->>` operator. /// - /// On PostgreSQL, this operator that extracts a JSON object field or JSON + /// On PostgreSQL, this operator extracts a JSON object field or JSON /// array element and converts it to text, for example `'{"a":"b"}'::json /// ->> 'a'` or `[1, 2, 3]'::json ->> 2`. /// diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index 22e0baeb2..fc45545d4 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -24,12 +24,13 @@ mod redshift; mod snowflake; mod sqlite; -use crate::ast::{Expr, Statement}; use core::any::{Any, TypeId}; use core::fmt::Debug; use core::iter::Peekable; use core::str::Chars; +use log::debug; + pub use self::ansi::AnsiDialect; pub use self::bigquery::BigQueryDialect; pub use self::clickhouse::ClickHouseDialect; @@ -43,8 +44,11 @@ pub use self::postgresql::PostgreSqlDialect; pub use self::redshift::RedshiftSqlDialect; pub use self::snowflake::SnowflakeDialect; pub use self::sqlite::SQLiteDialect; +use crate::ast::{Expr, Statement}; pub use crate::keywords; +use crate::keywords::Keyword; use crate::parser::{Parser, ParserError}; +use crate::tokenizer::Token; #[cfg(not(feature = "std"))] use alloc::boxed::Box; @@ -300,13 +304,172 @@ pub trait Dialect: Debug + Any { // return None to fall back to the default behavior None } + + /// Get the precedence of the next token. This "full" method means all precedence logic and remain + /// in the dialect. while still allowing overriding the `get_next_precedence` method with the option to + /// fallback to the default behavior. + /// + /// Higher number => higher precedence + fn get_next_precedence_full(&self, parser: &Parser) -> Result { + if let Some(precedence) = self.get_next_precedence(parser) { + return precedence; + } + + let token = parser.peek_token(); + debug!("get_next_precedence() {:?}", token); + match token.token { + Token::Word(w) if w.keyword == Keyword::OR => Ok(OR_PREC), + Token::Word(w) if w.keyword == Keyword::AND => Ok(AND_PREC), + Token::Word(w) if w.keyword == Keyword::XOR => Ok(XOR_PREC), + + Token::Word(w) if w.keyword == Keyword::AT => { + match ( + parser.peek_nth_token(1).token, + parser.peek_nth_token(2).token, + ) { + (Token::Word(w), Token::Word(w2)) + if w.keyword == Keyword::TIME && w2.keyword == Keyword::ZONE => + { + Ok(AT_TZ_PREC) + } + _ => Ok(UNKNOWN_PREC), + } + } + + Token::Word(w) if w.keyword == Keyword::NOT => match parser.peek_nth_token(1).token { + // The precedence of NOT varies depending on keyword that + // follows it. If it is followed by IN, BETWEEN, or LIKE, + // it takes on the precedence of those tokens. Otherwise, it + // is not an infix operator, and therefore has zero + // precedence. + Token::Word(w) if w.keyword == Keyword::IN => Ok(BETWEEN_PREC), + Token::Word(w) if w.keyword == Keyword::BETWEEN => Ok(BETWEEN_PREC), + Token::Word(w) if w.keyword == Keyword::LIKE => Ok(LIKE_PREC), + Token::Word(w) if w.keyword == Keyword::ILIKE => Ok(LIKE_PREC), + Token::Word(w) if w.keyword == Keyword::RLIKE => Ok(LIKE_PREC), + Token::Word(w) if w.keyword == Keyword::REGEXP => Ok(LIKE_PREC), + Token::Word(w) if w.keyword == Keyword::SIMILAR => Ok(LIKE_PREC), + _ => Ok(UNKNOWN_PREC), + }, + Token::Word(w) if w.keyword == Keyword::IS => Ok(IS_PREC), + Token::Word(w) if w.keyword == Keyword::IN => Ok(BETWEEN_PREC), + Token::Word(w) if w.keyword == Keyword::BETWEEN => Ok(BETWEEN_PREC), + Token::Word(w) if w.keyword == Keyword::LIKE => Ok(LIKE_PREC), + Token::Word(w) if w.keyword == Keyword::ILIKE => Ok(LIKE_PREC), + Token::Word(w) if w.keyword == Keyword::RLIKE => Ok(LIKE_PREC), + Token::Word(w) if w.keyword == Keyword::REGEXP => Ok(LIKE_PREC), + Token::Word(w) if w.keyword == Keyword::SIMILAR => Ok(LIKE_PREC), + Token::Word(w) if w.keyword == Keyword::OPERATOR => Ok(BETWEEN_PREC), + Token::Word(w) if w.keyword == Keyword::DIV => Ok(MUL_DIV_MOD_OP_PREC), + Token::Eq + | Token::Lt + | Token::LtEq + | Token::Neq + | Token::Gt + | Token::GtEq + | Token::DoubleEq + | Token::Tilde + | Token::TildeAsterisk + | Token::ExclamationMarkTilde + | Token::ExclamationMarkTildeAsterisk + | Token::DoubleTilde + | Token::DoubleTildeAsterisk + | Token::ExclamationMarkDoubleTilde + | Token::ExclamationMarkDoubleTildeAsterisk + | Token::Spaceship => Ok(EQ_PREC), + Token::Pipe => Ok(PIPE_PREC), + Token::Caret | Token::Sharp | Token::ShiftRight | Token::ShiftLeft => Ok(CARET_PREC), + Token::Ampersand => Ok(AMPERSAND_PREC), + Token::Plus | Token::Minus => Ok(PLUS_MINUS_PREC), + Token::Mul | Token::Div | Token::DuckIntDiv | Token::Mod | Token::StringConcat => { + Ok(MUL_DIV_MOD_OP_PREC) + } + Token::DoubleColon + | Token::ExclamationMark + | Token::LBracket + | Token::Overlap + | Token::CaretAt => Ok(DOUBLE_COLON_PREC), + // Token::Colon if (self as dyn Dialect).is::() => Ok(DOUBLE_COLON_PREC), + Token::Arrow + | Token::LongArrow + | Token::HashArrow + | Token::HashLongArrow + | Token::AtArrow + | Token::ArrowAt + | Token::HashMinus + | Token::AtQuestion + | Token::AtAt + | Token::Question + | Token::QuestionAnd + | Token::QuestionPipe + | Token::CustomBinaryOperator(_) => Ok(PG_OTHER_PREC), + _ => Ok(UNKNOWN_PREC), + } + } + /// Dialect-specific statement parser override fn parse_statement(&self, _parser: &mut Parser) -> Option> { // return None to fall back to the default behavior None } + + /// The following precedence values are used directly by `Parse` or in dialects, + /// so have to be made public by the dialect. + fn prec_double_colon(&self) -> u8 { + DOUBLE_COLON_PREC + } + + fn prec_mul_div_mod_op(&self) -> u8 { + MUL_DIV_MOD_OP_PREC + } + + fn prec_plus_minus(&self) -> u8 { + PLUS_MINUS_PREC + } + + fn prec_between(&self) -> u8 { + BETWEEN_PREC + } + + fn prec_like(&self) -> u8 { + LIKE_PREC + } + + fn prec_unary_not(&self) -> u8 { + UNARY_NOT_PREC + } + + fn prec_unknown(&self) -> u8 { + UNKNOWN_PREC + } } +// Define the lexical Precedence of operators. +// +// Uses (APPROXIMATELY) as a reference +// higher number = higher precedence +// +// NOTE: The pg documentation is incomplete, e.g. the AT TIME ZONE operator +// actually has higher precedence than addition. +// See . +const DOUBLE_COLON_PREC: u8 = 50; +const AT_TZ_PREC: u8 = 41; +const MUL_DIV_MOD_OP_PREC: u8 = 40; +const PLUS_MINUS_PREC: u8 = 30; +const XOR_PREC: u8 = 24; +const AMPERSAND_PREC: u8 = 23; +const CARET_PREC: u8 = 22; +const PIPE_PREC: u8 = 21; +const BETWEEN_PREC: u8 = 20; +const EQ_PREC: u8 = 20; +const LIKE_PREC: u8 = 19; +const IS_PREC: u8 = 17; +const PG_OTHER_PREC: u8 = 16; +const UNARY_NOT_PREC: u8 = 15; +const AND_PREC: u8 = 10; +const OR_PREC: u8 = 5; +const UNKNOWN_PREC: u8 = 0; + impl dyn Dialect { #[inline] pub fn is(&self) -> bool { diff --git a/src/dialect/postgresql.rs b/src/dialect/postgresql.rs index 8254e807b..293fb9e7d 100644 --- a/src/dialect/postgresql.rs +++ b/src/dialect/postgresql.rs @@ -9,6 +9,7 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. +use log::debug; use crate::ast::{CommentObject, Statement}; use crate::dialect::Dialect; @@ -20,6 +21,23 @@ use crate::tokenizer::Token; #[derive(Debug)] pub struct PostgreSqlDialect {} +const DOUBLE_COLON_PREC: u8 = 140; +const BRACKET_PREC: u8 = 130; +const COLLATE_PREC: u8 = 120; +const AT_TZ_PREC: u8 = 110; +const CARET_PREC: u8 = 100; +const MUL_DIV_MOD_OP_PREC: u8 = 90; +const PLUS_MINUS_PREC: u8 = 80; +// there's no XOR operator in PostgreSQL, but support it here to avoid breaking tests +const XOR_PREC: u8 = 75; +const PG_OTHER_PREC: u8 = 70; +const BETWEEN_LIKE_PREC: u8 = 60; +const EQ_PREC: u8 = 50; +const IS_PREC: u8 = 40; +const NOT_PREC: u8 = 30; +const AND_PREC: u8 = 20; +const OR_PREC: u8 = 10; + impl Dialect for PostgreSqlDialect { fn identifier_quote_style(&self, _identifier: &str) -> Option { Some('"') @@ -67,6 +85,102 @@ impl Dialect for PostgreSqlDialect { ) } + fn get_next_precedence(&self, parser: &Parser) -> Option> { + let token = parser.peek_token(); + debug!("get_next_precedence() {:?}", token); + + let precedence = match token.token { + Token::Word(w) if w.keyword == Keyword::OR => OR_PREC, + Token::Word(w) if w.keyword == Keyword::XOR => XOR_PREC, + Token::Word(w) if w.keyword == Keyword::AND => AND_PREC, + Token::Word(w) if w.keyword == Keyword::AT => { + match ( + parser.peek_nth_token(1).token, + parser.peek_nth_token(2).token, + ) { + (Token::Word(w), Token::Word(w2)) + if w.keyword == Keyword::TIME && w2.keyword == Keyword::ZONE => + { + AT_TZ_PREC + } + _ => self.prec_unknown(), + } + } + + Token::Word(w) if w.keyword == Keyword::NOT => match parser.peek_nth_token(1).token { + // The precedence of NOT varies depending on keyword that + // follows it. If it is followed by IN, BETWEEN, or LIKE, + // it takes on the precedence of those tokens. Otherwise, it + // is not an infix operator, and therefore has zero + // precedence. + Token::Word(w) if w.keyword == Keyword::IN => BETWEEN_LIKE_PREC, + Token::Word(w) if w.keyword == Keyword::BETWEEN => BETWEEN_LIKE_PREC, + Token::Word(w) if w.keyword == Keyword::LIKE => BETWEEN_LIKE_PREC, + Token::Word(w) if w.keyword == Keyword::ILIKE => BETWEEN_LIKE_PREC, + Token::Word(w) if w.keyword == Keyword::RLIKE => BETWEEN_LIKE_PREC, + Token::Word(w) if w.keyword == Keyword::REGEXP => BETWEEN_LIKE_PREC, + Token::Word(w) if w.keyword == Keyword::SIMILAR => BETWEEN_LIKE_PREC, + _ => self.prec_unknown(), + }, + Token::Word(w) if w.keyword == Keyword::IS => IS_PREC, + Token::Word(w) if w.keyword == Keyword::IN => BETWEEN_LIKE_PREC, + Token::Word(w) if w.keyword == Keyword::BETWEEN => BETWEEN_LIKE_PREC, + Token::Word(w) if w.keyword == Keyword::LIKE => BETWEEN_LIKE_PREC, + Token::Word(w) if w.keyword == Keyword::ILIKE => BETWEEN_LIKE_PREC, + Token::Word(w) if w.keyword == Keyword::RLIKE => BETWEEN_LIKE_PREC, + Token::Word(w) if w.keyword == Keyword::REGEXP => BETWEEN_LIKE_PREC, + Token::Word(w) if w.keyword == Keyword::SIMILAR => BETWEEN_LIKE_PREC, + Token::Word(w) if w.keyword == Keyword::OPERATOR => BETWEEN_LIKE_PREC, + Token::Word(w) if w.keyword == Keyword::DIV => MUL_DIV_MOD_OP_PREC, + Token::Word(w) if w.keyword == Keyword::COLLATE => COLLATE_PREC, + Token::Eq + | Token::Lt + | Token::LtEq + | Token::Neq + | Token::Gt + | Token::GtEq + | Token::DoubleEq + | Token::Tilde + | Token::TildeAsterisk + | Token::ExclamationMarkTilde + | Token::ExclamationMarkTildeAsterisk + | Token::DoubleTilde + | Token::DoubleTildeAsterisk + | Token::ExclamationMarkDoubleTilde + | Token::ExclamationMarkDoubleTildeAsterisk + | Token::Spaceship => EQ_PREC, + Token::Caret => CARET_PREC, + Token::Plus | Token::Minus => PLUS_MINUS_PREC, + Token::Mul | Token::Div | Token::Mod => MUL_DIV_MOD_OP_PREC, + Token::DoubleColon => DOUBLE_COLON_PREC, + Token::LBracket => BRACKET_PREC, + Token::Arrow + | Token::LongArrow + | Token::HashArrow + | Token::HashLongArrow + | Token::AtArrow + | Token::ArrowAt + | Token::HashMinus + | Token::AtQuestion + | Token::AtAt + | Token::Question + | Token::QuestionAnd + | Token::QuestionPipe + | Token::ExclamationMark + | Token::Overlap + | Token::CaretAt + | Token::StringConcat + | Token::Sharp + | Token::ShiftRight + | Token::ShiftLeft + | Token::Pipe + | Token::Ampersand + | Token::CustomBinaryOperator(_) => PG_OTHER_PREC, + _ => self.prec_unknown(), + }; + Some(Ok(precedence)) + } + fn parse_statement(&self, parser: &mut Parser) -> Option> { if parser.parse_keyword(Keyword::COMMENT) { Some(parse_comment(parser)) @@ -82,6 +196,26 @@ impl Dialect for PostgreSqlDialect { fn supports_group_by_expr(&self) -> bool { true } + + fn prec_mul_div_mod_op(&self) -> u8 { + MUL_DIV_MOD_OP_PREC + } + + fn prec_plus_minus(&self) -> u8 { + PLUS_MINUS_PREC + } + + fn prec_between(&self) -> u8 { + BETWEEN_LIKE_PREC + } + + fn prec_like(&self) -> u8 { + BETWEEN_LIKE_PREC + } + + fn prec_unary_not(&self) -> u8 { + NOT_PREC + } } pub fn parse_comment(parser: &mut Parser) -> Result { diff --git a/src/dialect/snowflake.rs b/src/dialect/snowflake.rs index 9f1d7f27b..fe35d8da3 100644 --- a/src/dialect/snowflake.rs +++ b/src/dialect/snowflake.rs @@ -145,6 +145,15 @@ impl Dialect for SnowflakeDialect { None } + + fn get_next_precedence(&self, parser: &Parser) -> Option> { + let token = parser.peek_token(); + // Snowflake supports the `:` cast operator unlike other dialects + match token.token { + Token::Colon => Some(Ok(self.prec_double_colon())), + _ => None, + } + } } /// Parse snowflake create table statement. diff --git a/src/parser/mod.rs b/src/parser/mod.rs index fe8acb4f2..1fdba5ecf 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -875,7 +875,7 @@ impl<'a> Parser<'a> { /// Parse a new expression. pub fn parse_expr(&mut self) -> Result { let _guard = self.recursion_counter.try_decrease()?; - self.parse_subexpr(0) + self.parse_subexpr(self.dialect.prec_unknown()) } /// Parse tokens until the precedence changes. @@ -897,7 +897,7 @@ impl<'a> Parser<'a> { } pub fn parse_interval_expr(&mut self) -> Result { - let precedence = 0; + let precedence = self.dialect.prec_unknown(); let mut expr = self.parse_prefix()?; loop { @@ -918,9 +918,9 @@ impl<'a> Parser<'a> { let token = self.peek_token(); match token.token { - Token::Word(w) if w.keyword == Keyword::AND => Ok(0), - Token::Word(w) if w.keyword == Keyword::OR => Ok(0), - Token::Word(w) if w.keyword == Keyword::XOR => Ok(0), + Token::Word(w) if w.keyword == Keyword::AND => Ok(self.dialect.prec_unknown()), + Token::Word(w) if w.keyword == Keyword::OR => Ok(self.dialect.prec_unknown()), + Token::Word(w) if w.keyword == Keyword::XOR => Ok(self.dialect.prec_unknown()), _ => self.get_next_precedence(), } } @@ -1079,7 +1079,7 @@ impl<'a> Parser<'a> { self.parse_bigquery_struct_literal() } Keyword::PRIOR if matches!(self.state, ParserState::ConnectBy) => { - let expr = self.parse_subexpr(Self::PLUS_MINUS_PREC)?; + let expr = self.parse_subexpr(self.dialect.prec_plus_minus())?; Ok(Expr::Prior(Box::new(expr))) } Keyword::MAP if self.peek_token() == Token::LBrace && self.dialect.support_map_literal_syntax() => { @@ -1167,7 +1167,7 @@ impl<'a> Parser<'a> { }; Ok(Expr::UnaryOp { op, - expr: Box::new(self.parse_subexpr(Self::MUL_DIV_MOD_OP_PREC)?), + expr: Box::new(self.parse_subexpr(self.dialect.prec_mul_div_mod_op())?), }) } tok @ Token::DoubleExclamationMark @@ -1187,7 +1187,7 @@ impl<'a> Parser<'a> { }; Ok(Expr::UnaryOp { op, - expr: Box::new(self.parse_subexpr(Self::PLUS_MINUS_PREC)?), + expr: Box::new(self.parse_subexpr(self.dialect.prec_plus_minus())?), }) } Token::EscapedStringLiteral(_) if dialect_of!(self is PostgreSqlDialect | GenericDialect) => @@ -1716,12 +1716,13 @@ impl<'a> Parser<'a> { } pub fn parse_position_expr(&mut self, ident: Ident) -> Result { + let between_prec = self.dialect.prec_between(); let position_expr = self.maybe_parse(|p| { // PARSE SELECT POSITION('@' in field) p.expect_token(&Token::LParen)?; // Parse the subexpr till the IN keyword - let expr = p.parse_subexpr(Self::BETWEEN_PREC)?; + let expr = p.parse_subexpr(between_prec)?; p.expect_keyword(Keyword::IN)?; let from = p.parse_expr()?; p.expect_token(&Token::RParen)?; @@ -1963,12 +1964,12 @@ impl<'a> Parser<'a> { } _ => Ok(Expr::UnaryOp { op: UnaryOperator::Not, - expr: Box::new(self.parse_subexpr(Self::UNARY_NOT_PREC)?), + expr: Box::new(self.parse_subexpr(self.dialect.prec_unary_not())?), }), }, _ => Ok(Expr::UnaryOp { op: UnaryOperator::Not, - expr: Box::new(self.parse_subexpr(Self::UNARY_NOT_PREC)?), + expr: Box::new(self.parse_subexpr(self.dialect.prec_unary_not())?), }), } } @@ -2641,7 +2642,7 @@ impl<'a> Parser<'a> { Ok(Expr::RLike { negated, expr: Box::new(expr), - pattern: Box::new(self.parse_subexpr(Self::LIKE_PREC)?), + pattern: Box::new(self.parse_subexpr(self.dialect.prec_like())?), regexp, }) } else if self.parse_keyword(Keyword::IN) { @@ -2652,21 +2653,21 @@ impl<'a> Parser<'a> { Ok(Expr::Like { negated, expr: Box::new(expr), - pattern: Box::new(self.parse_subexpr(Self::LIKE_PREC)?), + pattern: Box::new(self.parse_subexpr(self.dialect.prec_like())?), escape_char: self.parse_escape_char()?, }) } else if self.parse_keyword(Keyword::ILIKE) { Ok(Expr::ILike { negated, expr: Box::new(expr), - pattern: Box::new(self.parse_subexpr(Self::LIKE_PREC)?), + pattern: Box::new(self.parse_subexpr(self.dialect.prec_like())?), escape_char: self.parse_escape_char()?, }) } else if self.parse_keywords(&[Keyword::SIMILAR, Keyword::TO]) { Ok(Expr::SimilarTo { negated, expr: Box::new(expr), - pattern: Box::new(self.parse_subexpr(Self::LIKE_PREC)?), + pattern: Box::new(self.parse_subexpr(self.dialect.prec_like())?), escape_char: self.parse_escape_char()?, }) } else { @@ -2941,9 +2942,9 @@ impl<'a> Parser<'a> { pub fn parse_between(&mut self, expr: Expr, negated: bool) -> Result { // Stop parsing subexpressions for and on tokens with // precedence lower than that of `BETWEEN`, such as `AND`, `IS`, etc. - let low = self.parse_subexpr(Self::BETWEEN_PREC)?; + let low = self.parse_subexpr(self.dialect.prec_between())?; self.expect_keyword(Keyword::AND)?; - let high = self.parse_subexpr(Self::BETWEEN_PREC)?; + let high = self.parse_subexpr(self.dialect.prec_between())?; Ok(Expr::Between { expr: Box::new(expr), negated, @@ -2962,118 +2963,9 @@ impl<'a> Parser<'a> { }) } - // Use https://www.postgresql.org/docs/7.0/operators.htm#AEN2026 as a reference - // higher number = higher precedence - // - // NOTE: The pg documentation is incomplete, e.g. the AT TIME ZONE operator - // actually has higher precedence than addition. - // See https://postgrespro.com/list/thread-id/2673331. - const AT_TZ_PREC: u8 = 41; - const MUL_DIV_MOD_OP_PREC: u8 = 40; - const PLUS_MINUS_PREC: u8 = 30; - const XOR_PREC: u8 = 24; - const BETWEEN_PREC: u8 = 20; - const LIKE_PREC: u8 = 19; - const IS_PREC: u8 = 17; - const PG_OTHER_PREC: u8 = 16; - const UNARY_NOT_PREC: u8 = 15; - const AND_PREC: u8 = 10; - const OR_PREC: u8 = 5; - /// Get the precedence of the next token pub fn get_next_precedence(&self) -> Result { - // allow the dialect to override precedence logic - if let Some(precedence) = self.dialect.get_next_precedence(self) { - return precedence; - } - - let token = self.peek_token(); - debug!("get_next_precedence() {:?}", token); - let [token_0, token_1, token_2] = self.peek_tokens_with_location(); - debug!("0: {token_0} 1: {token_1} 2: {token_2}"); - match token.token { - Token::Word(w) if w.keyword == Keyword::OR => Ok(Self::OR_PREC), - Token::Word(w) if w.keyword == Keyword::AND => Ok(Self::AND_PREC), - Token::Word(w) if w.keyword == Keyword::XOR => Ok(Self::XOR_PREC), - - Token::Word(w) if w.keyword == Keyword::AT => { - match (self.peek_nth_token(1).token, self.peek_nth_token(2).token) { - (Token::Word(w), Token::Word(w2)) - if w.keyword == Keyword::TIME && w2.keyword == Keyword::ZONE => - { - Ok(Self::AT_TZ_PREC) - } - _ => Ok(0), - } - } - - Token::Word(w) if w.keyword == Keyword::NOT => match self.peek_nth_token(1).token { - // The precedence of NOT varies depending on keyword that - // follows it. If it is followed by IN, BETWEEN, or LIKE, - // it takes on the precedence of those tokens. Otherwise, it - // is not an infix operator, and therefore has zero - // precedence. - Token::Word(w) if w.keyword == Keyword::IN => Ok(Self::BETWEEN_PREC), - Token::Word(w) if w.keyword == Keyword::BETWEEN => Ok(Self::BETWEEN_PREC), - Token::Word(w) if w.keyword == Keyword::LIKE => Ok(Self::LIKE_PREC), - Token::Word(w) if w.keyword == Keyword::ILIKE => Ok(Self::LIKE_PREC), - Token::Word(w) if w.keyword == Keyword::RLIKE => Ok(Self::LIKE_PREC), - Token::Word(w) if w.keyword == Keyword::REGEXP => Ok(Self::LIKE_PREC), - Token::Word(w) if w.keyword == Keyword::SIMILAR => Ok(Self::LIKE_PREC), - _ => Ok(0), - }, - Token::Word(w) if w.keyword == Keyword::IS => Ok(Self::IS_PREC), - Token::Word(w) if w.keyword == Keyword::IN => Ok(Self::BETWEEN_PREC), - Token::Word(w) if w.keyword == Keyword::BETWEEN => Ok(Self::BETWEEN_PREC), - Token::Word(w) if w.keyword == Keyword::LIKE => Ok(Self::LIKE_PREC), - Token::Word(w) if w.keyword == Keyword::ILIKE => Ok(Self::LIKE_PREC), - Token::Word(w) if w.keyword == Keyword::RLIKE => Ok(Self::LIKE_PREC), - Token::Word(w) if w.keyword == Keyword::REGEXP => Ok(Self::LIKE_PREC), - Token::Word(w) if w.keyword == Keyword::SIMILAR => Ok(Self::LIKE_PREC), - Token::Word(w) if w.keyword == Keyword::OPERATOR => Ok(Self::BETWEEN_PREC), - Token::Word(w) if w.keyword == Keyword::DIV => Ok(Self::MUL_DIV_MOD_OP_PREC), - Token::Eq - | Token::Lt - | Token::LtEq - | Token::Neq - | Token::Gt - | Token::GtEq - | Token::DoubleEq - | Token::Tilde - | Token::TildeAsterisk - | Token::ExclamationMarkTilde - | Token::ExclamationMarkTildeAsterisk - | Token::DoubleTilde - | Token::DoubleTildeAsterisk - | Token::ExclamationMarkDoubleTilde - | Token::ExclamationMarkDoubleTildeAsterisk - | Token::Spaceship => Ok(20), - Token::Pipe => Ok(21), - Token::Caret | Token::Sharp | Token::ShiftRight | Token::ShiftLeft => Ok(22), - Token::Ampersand => Ok(23), - Token::Plus | Token::Minus => Ok(Self::PLUS_MINUS_PREC), - Token::Mul | Token::Div | Token::DuckIntDiv | Token::Mod | Token::StringConcat => { - Ok(Self::MUL_DIV_MOD_OP_PREC) - } - Token::DoubleColon => Ok(50), - Token::Colon if dialect_of!(self is SnowflakeDialect) => Ok(50), - Token::ExclamationMark => Ok(50), - Token::LBracket | Token::Overlap | Token::CaretAt => Ok(50), - Token::Arrow - | Token::LongArrow - | Token::HashArrow - | Token::HashLongArrow - | Token::AtArrow - | Token::ArrowAt - | Token::HashMinus - | Token::AtQuestion - | Token::AtAt - | Token::Question - | Token::QuestionAnd - | Token::QuestionPipe - | Token::CustomBinaryOperator(_) => Ok(Self::PG_OTHER_PREC), - _ => Ok(0), - } + self.dialect.get_next_precedence_full(self) } /// Return the first non-whitespace token that has not yet been processed @@ -8051,7 +7943,7 @@ impl<'a> Parser<'a> { format_clause: None, }) } else { - let body = self.parse_boxed_query_body(0)?; + let body = self.parse_boxed_query_body(self.dialect.prec_unknown())?; let order_by = if self.parse_keywords(&[Keyword::ORDER, Keyword::BY]) { let order_by_exprs = self.parse_comma_separated(Parser::parse_order_by_expr)?; diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 7406bdd74..150f06913 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -4476,3 +4476,115 @@ fn test_unicode_string_literal() { } } } + +fn check_arrow_precedence(sql: &str, arrow_operator: BinaryOperator) { + assert_eq!( + pg().verified_stmt(sql), + Statement::Query(Box::new(Query { + with: None, + body: Box::new(SetExpr::Select(Box::new(Select { + distinct: None, + top: None, + projection: vec![SelectItem::UnnamedExpr(Expr::BinaryOp { + left: Box::new(Expr::BinaryOp { + left: Box::new(Expr::Identifier(Ident { + value: "foo".to_string(), + quote_style: None, + })), + op: arrow_operator, + right: Box::new(Expr::Value(Value::SingleQuotedString("bar".to_string()))), + }), + op: BinaryOperator::Eq, + right: Box::new(Expr::Value(Value::SingleQuotedString("spam".to_string()))), + })], + into: None, + from: vec![], + lateral_views: vec![], + prewhere: None, + selection: None, + group_by: GroupByExpr::Expressions(vec![], vec![]), + cluster_by: vec![], + distribute_by: vec![], + sort_by: vec![], + having: None, + named_window: vec![], + qualify: None, + window_before_qualify: false, + value_table_mode: None, + connect_by: None, + }))), + order_by: None, + limit: None, + limit_by: vec![], + offset: None, + fetch: None, + locks: vec![], + for_clause: None, + settings: None, + format_clause: None, + })) + ) +} + +#[test] +fn arrow_precedence() { + check_arrow_precedence("SELECT foo -> 'bar' = 'spam'", BinaryOperator::Arrow); +} + +#[test] +fn long_arrow_precedence() { + check_arrow_precedence("SELECT foo ->> 'bar' = 'spam'", BinaryOperator::LongArrow); +} + +#[test] +fn arrow_cast_precedence() { + // check this matches postgres where you would need `(foo -> 'bar')::TEXT` + let stmt = pg().verified_stmt("SELECT foo -> 'bar'::TEXT"); + assert_eq!( + stmt, + Statement::Query(Box::new(Query { + with: None, + body: Box::new(SetExpr::Select(Box::new(Select { + distinct: None, + top: None, + projection: vec![SelectItem::UnnamedExpr(Expr::BinaryOp { + left: Box::new(Expr::Identifier(Ident { + value: "foo".to_string(), + quote_style: None, + })), + op: BinaryOperator::Arrow, + right: Box::new(Expr::Cast { + kind: CastKind::DoubleColon, + expr: Box::new(Expr::Value(Value::SingleQuotedString("bar".to_string()))), + data_type: DataType::Text, + format: None, + }), + })], + into: None, + from: vec![], + lateral_views: vec![], + prewhere: None, + selection: None, + group_by: GroupByExpr::Expressions(vec![], vec![]), + cluster_by: vec![], + distribute_by: vec![], + sort_by: vec![], + having: None, + named_window: vec![], + qualify: None, + window_before_qualify: false, + value_table_mode: None, + connect_by: None, + }))), + order_by: None, + limit: None, + limit_by: vec![], + offset: None, + fetch: None, + locks: vec![], + for_clause: None, + settings: None, + format_clause: None, + })) + ) +} From da484c57c4a5682da24c070d76c872148e54bbfe Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 6 Aug 2024 08:23:07 -0400 Subject: [PATCH 22/57] Improve comments on `Dialect` (#1366) --- src/dialect/mod.rs | 90 ++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 80 insertions(+), 10 deletions(-) diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index fc45545d4..9033ecc78 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -66,7 +66,8 @@ macro_rules! dialect_of { /// Encapsulates the differences between SQL implementations. /// /// # SQL Dialects -/// SQL implementations deviatiate from one another, either due to +/// +/// SQL implementations deviate from one another, either due to /// custom extensions or various historical reasons. This trait /// encapsulates the parsing differences between dialects. /// @@ -114,16 +115,20 @@ pub trait Dialect: Debug + Any { fn is_delimited_identifier_start(&self, ch: char) -> bool { ch == '"' || ch == '`' } + /// Return the character used to quote identifiers. fn identifier_quote_style(&self, _identifier: &str) -> Option { None } + /// Determine if quoted characters are proper for identifier fn is_proper_identifier_inside_quotes(&self, mut _chars: Peekable>) -> bool { true } + /// Determine if a character is a valid start character for an unquoted identifier fn is_identifier_start(&self, ch: char) -> bool; + /// Determine if a character is a valid unquoted identifier character fn is_identifier_part(&self, ch: char) -> bool; @@ -168,6 +173,7 @@ pub trait Dialect: Debug + Any { fn supports_filter_during_aggregation(&self) -> bool { false } + /// Returns true if the dialect supports referencing another named window /// within a window clause declaration. /// @@ -179,6 +185,7 @@ pub trait Dialect: Debug + Any { fn supports_window_clause_named_window_reference(&self) -> bool { false } + /// Returns true if the dialect supports `ARRAY_AGG() [WITHIN GROUP (ORDER BY)]` expressions. /// Otherwise, the dialect should expect an `ORDER BY` without the `WITHIN GROUP` clause, e.g. [`ANSI`] /// @@ -186,38 +193,47 @@ pub trait Dialect: Debug + Any { fn supports_within_after_array_aggregation(&self) -> bool { false } + /// Returns true if the dialects supports `group sets, roll up, or cube` expressions. fn supports_group_by_expr(&self) -> bool { false } + /// Returns true if the dialect supports CONNECT BY. fn supports_connect_by(&self) -> bool { false } + /// Returns true if the dialect supports the MATCH_RECOGNIZE operation. fn supports_match_recognize(&self) -> bool { false } + /// Returns true if the dialect supports `(NOT) IN ()` expressions fn supports_in_empty_list(&self) -> bool { false } + /// Returns true if the dialect supports `BEGIN {DEFERRED | IMMEDIATE | EXCLUSIVE} [TRANSACTION]` statements fn supports_start_transaction_modifier(&self) -> bool { false } + /// Returns true if the dialect supports named arguments of the form FUN(a = '1', b = '2'). fn supports_named_fn_args_with_eq_operator(&self) -> bool { false } + /// Returns true if the dialect supports identifiers starting with a numeric - /// prefix such as tables named: `59901_user_login` + /// prefix such as tables named `59901_user_login` fn supports_numeric_prefix(&self) -> bool { false } + /// Returns true if the dialects supports specifying null treatment - /// as part of a window function's parameter list. As opposed + /// as part of a window function's parameter list as opposed /// to after the parameter list. + /// /// i.e The following syntax returns true /// ```sql /// FIRST_VALUE(a IGNORE NULLS) OVER () @@ -229,16 +245,19 @@ pub trait Dialect: Debug + Any { fn supports_window_function_null_treatment_arg(&self) -> bool { false } + /// Returns true if the dialect supports defining structs or objects using a /// syntax like `{'x': 1, 'y': 2, 'z': 3}`. fn supports_dictionary_syntax(&self) -> bool { false } + /// Returns true if the dialect supports defining object using the /// syntax like `Map {1: 10, 2: 20}`. fn support_map_literal_syntax(&self) -> bool { false } + /// Returns true if the dialect supports lambda functions, for example: /// /// ```sql @@ -247,6 +266,7 @@ pub trait Dialect: Debug + Any { fn supports_lambda_functions(&self) -> bool { false } + /// Returns true if the dialect supports multiple variable assignment /// using parentheses in a `SET` variable declaration. /// @@ -256,6 +276,7 @@ pub trait Dialect: Debug + Any { fn supports_parenthesized_set_variables(&self) -> bool { false } + /// Returns true if the dialect supports an `EXCEPT` clause following a /// wildcard in a select list. /// @@ -266,30 +287,40 @@ pub trait Dialect: Debug + Any { fn supports_select_wildcard_except(&self) -> bool { false } + /// Returns true if the dialect has a CONVERT function which accepts a type first /// and an expression second, e.g. `CONVERT(varchar, 1)` fn convert_type_before_value(&self) -> bool { false } + /// Returns true if the dialect supports triple quoted string /// e.g. `"""abc"""` fn supports_triple_quoted_string(&self) -> bool { false } + /// Dialect-specific prefix parser override fn parse_prefix(&self, _parser: &mut Parser) -> Option> { // return None to fall back to the default behavior None } + /// Does the dialect support trailing commas around the query? fn supports_trailing_commas(&self) -> bool { false } + /// Does the dialect support trailing commas in the projection list? fn supports_projection_trailing_commas(&self) -> bool { self.supports_trailing_commas() } + /// Dialect-specific infix parser override + /// + /// This method is called to parse the next infix expression. + /// + /// If `None` is returned, falls back to the default behavior. fn parse_infix( &self, _parser: &mut Parser, @@ -299,24 +330,33 @@ pub trait Dialect: Debug + Any { // return None to fall back to the default behavior None } + /// Dialect-specific precedence override + /// + /// This method is called to get the precedence of the next token. + /// + /// If `None` is returned, falls back to the default behavior. fn get_next_precedence(&self, _parser: &Parser) -> Option> { // return None to fall back to the default behavior None } - /// Get the precedence of the next token. This "full" method means all precedence logic and remain - /// in the dialect. while still allowing overriding the `get_next_precedence` method with the option to - /// fallback to the default behavior. + /// Get the precedence of the next token, looking at the full token stream. /// - /// Higher number => higher precedence + /// A higher number => higher precedence + /// + /// See [`Self::get_next_precedence`] to override the behavior for just the + /// next token. + /// + /// The default implementation is used for many dialects, but can be + /// overridden to provide dialect-specific behavior. fn get_next_precedence_full(&self, parser: &Parser) -> Result { if let Some(precedence) = self.get_next_precedence(parser) { return precedence; } let token = parser.peek_token(); - debug!("get_next_precedence() {:?}", token); + debug!("get_next_precedence_full() {:?}", token); match token.token { Token::Word(w) if w.keyword == Keyword::OR => Ok(OR_PREC), Token::Word(w) if w.keyword == Keyword::AND => Ok(AND_PREC), @@ -408,37 +448,67 @@ pub trait Dialect: Debug + Any { } /// Dialect-specific statement parser override + /// + /// This method is called to parse the next statement. + /// + /// If `None` is returned, falls back to the default behavior. fn parse_statement(&self, _parser: &mut Parser) -> Option> { // return None to fall back to the default behavior None } - /// The following precedence values are used directly by `Parse` or in dialects, - /// so have to be made public by the dialect. + // The following precedence values are used directly by `Parse` or in dialects, + // so have to be made public by the dialect. + + /// Return the precedence of the `::` operator. + /// + /// Default is 50. fn prec_double_colon(&self) -> u8 { DOUBLE_COLON_PREC } + /// Return the precedence of `*`, `/`, and `%` operators. + /// + /// Default is 40. fn prec_mul_div_mod_op(&self) -> u8 { MUL_DIV_MOD_OP_PREC } + /// Return the precedence of the `+` and `-` operators. + /// + /// Default is 30. fn prec_plus_minus(&self) -> u8 { PLUS_MINUS_PREC } + /// Return the precedence of the `BETWEEN` operator. + /// + /// For example `BETWEEN AND ` + /// + /// Default is 22. fn prec_between(&self) -> u8 { BETWEEN_PREC } + /// Return the precedence of the `LIKE` operator. + /// + /// Default is 19. fn prec_like(&self) -> u8 { LIKE_PREC } + /// Return the precedence of the unary `NOT` operator. + /// + /// For example `NOT (a OR b)` + /// + /// Default is 15. fn prec_unary_not(&self) -> u8 { UNARY_NOT_PREC } + /// Return the default (unknown) precedence. + /// + /// Default is 0. fn prec_unknown(&self) -> u8 { UNKNOWN_PREC } From dfb8b81630ec7285c7ffc9e9113105ef1af56023 Mon Sep 17 00:00:00 2001 From: hulk Date: Thu, 8 Aug 2024 02:02:11 +0800 Subject: [PATCH 23/57] Add support of ATTACH/DETACH PARTITION for ClickHouse (#1362) --- src/ast/ddl.rs | 25 ++++++++++++++ src/keywords.rs | 1 + src/parser/mod.rs | 24 ++++++++++++- tests/sqlparser_clickhouse.rs | 65 +++++++++++++++++++++++++++++++++++ 4 files changed, 114 insertions(+), 1 deletion(-) diff --git a/src/ast/ddl.rs b/src/ast/ddl.rs index af679d469..d207f5766 100644 --- a/src/ast/ddl.rs +++ b/src/ast/ddl.rs @@ -72,6 +72,21 @@ pub enum AlterTableOperation { if_exists: bool, cascade: bool, }, + /// `ATTACH PART|PARTITION ` + /// Note: this is a ClickHouse-specific operation, please refer to + /// [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/alter/pakrtition#attach-partitionpart) + AttachPartition { + // PART is not a short form of PARTITION, it's a separate keyword + // which represents a physical file on disk and partition is a logical entity. + partition: Partition, + }, + /// `DETACH PART|PARTITION ` + /// Note: this is a ClickHouse-specific operation, please refer to + /// [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#detach-partitionpart) + DetachPartition { + // See `AttachPartition` for more details + partition: Partition, + }, /// `DROP PRIMARY KEY` /// /// Note: this is a MySQL-specific operation. @@ -272,6 +287,12 @@ impl fmt::Display for AlterTableOperation { column_name, if *cascade { " CASCADE" } else { "" } ), + AlterTableOperation::AttachPartition { partition } => { + write!(f, "ATTACH {partition}") + } + AlterTableOperation::DetachPartition { partition } => { + write!(f, "DETACH {partition}") + } AlterTableOperation::EnableAlwaysRule { name } => { write!(f, "ENABLE ALWAYS RULE {name}") } @@ -1305,6 +1326,9 @@ impl fmt::Display for UserDefinedTypeCompositeAttributeDef { pub enum Partition { Identifier(Ident), Expr(Expr), + /// ClickHouse supports PART expr which represents physical partition in disk. + /// [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#attach-partitionpart) + Part(Expr), Partitions(Vec), } @@ -1313,6 +1337,7 @@ impl fmt::Display for Partition { match self { Partition::Identifier(id) => write!(f, "PARTITION ID {id}"), Partition::Expr(expr) => write!(f, "PARTITION {expr}"), + Partition::Part(expr) => write!(f, "PART {expr}"), Partition::Partitions(partitions) => { write!(f, "PARTITION ({})", display_comma_separated(partitions)) } diff --git a/src/keywords.rs b/src/keywords.rs index 49bd969af..c175da874 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -539,6 +539,7 @@ define_keywords!( PARALLEL, PARAMETER, PARQUET, + PART, PARTITION, PARTITIONED, PARTITIONS, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 1fdba5ecf..b6d4c307f 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -6432,7 +6432,7 @@ impl<'a> Parser<'a> { } else if dialect_of!(self is PostgreSqlDialect | GenericDialect) && self.parse_keywords(&[Keyword::OWNER, Keyword::TO]) { - let new_owner = match self.parse_one_of_keywords( &[Keyword::CURRENT_USER, Keyword::CURRENT_ROLE, Keyword::SESSION_USER]) { + let new_owner = match self.parse_one_of_keywords(&[Keyword::CURRENT_USER, Keyword::CURRENT_ROLE, Keyword::SESSION_USER]) { Some(Keyword::CURRENT_USER) => Owner::CurrentUser, Some(Keyword::CURRENT_ROLE) => Owner::CurrentRole, Some(Keyword::SESSION_USER) => Owner::SessionUser, @@ -6448,6 +6448,18 @@ impl<'a> Parser<'a> { }; AlterTableOperation::OwnerTo { new_owner } + } else if dialect_of!(self is ClickHouseDialect|GenericDialect) + && self.parse_keyword(Keyword::ATTACH) + { + AlterTableOperation::AttachPartition { + partition: self.parse_part_or_partition()?, + } + } else if dialect_of!(self is ClickHouseDialect|GenericDialect) + && self.parse_keyword(Keyword::DETACH) + { + AlterTableOperation::DetachPartition { + partition: self.parse_part_or_partition()?, + } } else { let options: Vec = self.parse_options_with_keywords(&[Keyword::SET, Keyword::TBLPROPERTIES])?; @@ -6465,6 +6477,16 @@ impl<'a> Parser<'a> { Ok(operation) } + fn parse_part_or_partition(&mut self) -> Result { + let keyword = self.expect_one_of_keywords(&[Keyword::PART, Keyword::PARTITION])?; + match keyword { + Keyword::PART => Ok(Partition::Part(self.parse_expr()?)), + Keyword::PARTITION => Ok(Partition::Expr(self.parse_expr()?)), + // unreachable because expect_one_of_keywords used above + _ => unreachable!(), + } + } + pub fn parse_alter(&mut self) -> Result { let object_type = self.expect_one_of_keywords(&[ Keyword::VIEW, diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index 4108958fb..4676e6e50 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -222,6 +222,71 @@ fn parse_create_table() { ); } +#[test] +fn parse_alter_table_attach_and_detach_partition() { + for operation in &["ATTACH", "DETACH"] { + match clickhouse_and_generic() + .verified_stmt(format!("ALTER TABLE t0 {operation} PARTITION part").as_str()) + { + Statement::AlterTable { + name, operations, .. + } => { + pretty_assertions::assert_eq!("t0", name.to_string()); + pretty_assertions::assert_eq!( + operations[0], + if operation == &"ATTACH" { + AlterTableOperation::AttachPartition { + partition: Partition::Expr(Identifier(Ident::new("part"))), + } + } else { + AlterTableOperation::DetachPartition { + partition: Partition::Expr(Identifier(Ident::new("part"))), + } + } + ); + } + _ => unreachable!(), + } + + match clickhouse_and_generic() + .verified_stmt(format!("ALTER TABLE t1 {operation} PART part").as_str()) + { + Statement::AlterTable { + name, operations, .. + } => { + pretty_assertions::assert_eq!("t1", name.to_string()); + pretty_assertions::assert_eq!( + operations[0], + if operation == &"ATTACH" { + AlterTableOperation::AttachPartition { + partition: Partition::Part(Identifier(Ident::new("part"))), + } + } else { + AlterTableOperation::DetachPartition { + partition: Partition::Part(Identifier(Ident::new("part"))), + } + } + ); + } + _ => unreachable!(), + } + + // negative cases + assert_eq!( + clickhouse_and_generic() + .parse_sql_statements(format!("ALTER TABLE t0 {operation} PARTITION").as_str()) + .unwrap_err(), + ParserError("Expected: an expression:, found: EOF".to_string()) + ); + assert_eq!( + clickhouse_and_generic() + .parse_sql_statements(format!("ALTER TABLE t0 {operation} PART").as_str()) + .unwrap_err(), + ParserError("Expected: an expression:, found: EOF".to_string()) + ); + } +} + #[test] fn parse_optimize_table() { clickhouse_and_generic().verified_stmt("OPTIMIZE TABLE t0"); From 2d4b9b3e5683c8f415ecd35a9b78a3a0f8b2fcb8 Mon Sep 17 00:00:00 2001 From: Jesse Date: Wed, 7 Aug 2024 20:30:01 +0200 Subject: [PATCH 24/57] Make `Parser::maybe_parse` pub (#1364) --- src/parser/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index b6d4c307f..9b252ce29 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -3383,7 +3383,7 @@ impl<'a> Parser<'a> { /// Run a parser method `f`, reverting back to the current position if unsuccessful. #[must_use] - fn maybe_parse(&mut self, mut f: F) -> Option + pub fn maybe_parse(&mut self, mut f: F) -> Option where F: FnMut(&mut Parser) -> Result, { From 68a04cd40218bf5a3244c6574f091bde344f6d12 Mon Sep 17 00:00:00 2001 From: hulk Date: Fri, 9 Aug 2024 04:57:21 +0800 Subject: [PATCH 25/57] Update version of GitHub Actions (#1363) --- .github/workflows/rust.yml | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 64c4d114a..1d2c34276 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -8,31 +8,31 @@ jobs: runs-on: ubuntu-latest steps: - name: Set up Rust - uses: hecrj/setup-rust-action@v1 + uses: hecrj/setup-rust-action@v2 with: components: rustfmt # Note that `nightly` is required for `license_template_path`, as # it's an unstable feature. rust-version: nightly - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - run: cargo +nightly fmt -- --check --config-path <(echo 'license_template_path = "HEADER"') lint: runs-on: ubuntu-latest steps: - name: Set up Rust - uses: hecrj/setup-rust-action@v1 + uses: hecrj/setup-rust-action@v2 with: components: clippy - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - run: cargo clippy --all-targets --all-features -- -D warnings compile: runs-on: ubuntu-latest steps: - name: Set up Rust - uses: hecrj/setup-rust-action@v1 - - uses: actions/checkout@master + uses: hecrj/setup-rust-action@v2 + - uses: actions/checkout@v4 - run: cargo check --all-targets --all-features docs: @@ -41,18 +41,18 @@ jobs: RUSTDOCFLAGS: "-Dwarnings" steps: - name: Set up Rust - uses: hecrj/setup-rust-action@v1 - - uses: actions/checkout@master + uses: hecrj/setup-rust-action@v2 + - uses: actions/checkout@v4 - run: cargo doc --document-private-items --no-deps --workspace --all-features compile-no-std: runs-on: ubuntu-latest steps: - name: Set up Rust - uses: hecrj/setup-rust-action@v1 + uses: hecrj/setup-rust-action@v2 with: targets: 'thumbv6m-none-eabi' - - uses: actions/checkout@master + - uses: actions/checkout@v4 - run: cargo check --no-default-features --target thumbv6m-none-eabi test: @@ -62,7 +62,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Setup Rust - uses: hecrj/setup-rust-action@v1 + uses: hecrj/setup-rust-action@v2 with: rust-version: ${{ matrix.rust }} - name: Install Tarpaulin @@ -72,7 +72,7 @@ jobs: version: 0.14.2 use-tool-cache: true - name: Checkout - uses: actions/checkout@v2 + uses: actions/checkout@v4 - name: Test run: cargo test --all-features @@ -80,7 +80,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Setup Rust - uses: hecrj/setup-rust-action@v1 + uses: hecrj/setup-rust-action@v2 with: rust-version: stable - name: Install Tarpaulin @@ -90,7 +90,7 @@ jobs: version: 0.14.2 use-tool-cache: true - name: Checkout - uses: actions/checkout@v2 + uses: actions/checkout@v4 - name: Coverage run: cargo tarpaulin -o Lcov --output-dir ./coverage - name: Coveralls @@ -104,8 +104,8 @@ jobs: needs: [test] steps: - name: Set up Rust - uses: hecrj/setup-rust-action@v1 - - uses: actions/checkout@v2 + uses: hecrj/setup-rust-action@v2 + - uses: actions/checkout@v4 - name: Publish shell: bash run: | From 1e209d87415a5adfedccac8cee3e2860122e4acb Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Thu, 8 Aug 2024 16:58:31 -0400 Subject: [PATCH 26/57] Simplify arrow_cast tests (#1367) --- tests/sqlparser_postgres.rs | 120 +++++++++--------------------------- 1 file changed, 29 insertions(+), 91 deletions(-) diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 150f06913..f370748d2 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -4479,112 +4479,50 @@ fn test_unicode_string_literal() { fn check_arrow_precedence(sql: &str, arrow_operator: BinaryOperator) { assert_eq!( - pg().verified_stmt(sql), - Statement::Query(Box::new(Query { - with: None, - body: Box::new(SetExpr::Select(Box::new(Select { - distinct: None, - top: None, - projection: vec![SelectItem::UnnamedExpr(Expr::BinaryOp { - left: Box::new(Expr::BinaryOp { - left: Box::new(Expr::Identifier(Ident { - value: "foo".to_string(), - quote_style: None, - })), - op: arrow_operator, - right: Box::new(Expr::Value(Value::SingleQuotedString("bar".to_string()))), - }), - op: BinaryOperator::Eq, - right: Box::new(Expr::Value(Value::SingleQuotedString("spam".to_string()))), - })], - into: None, - from: vec![], - lateral_views: vec![], - prewhere: None, - selection: None, - group_by: GroupByExpr::Expressions(vec![], vec![]), - cluster_by: vec![], - distribute_by: vec![], - sort_by: vec![], - having: None, - named_window: vec![], - qualify: None, - window_before_qualify: false, - value_table_mode: None, - connect_by: None, - }))), - order_by: None, - limit: None, - limit_by: vec![], - offset: None, - fetch: None, - locks: vec![], - for_clause: None, - settings: None, - format_clause: None, - })) + pg().verified_expr(sql), + Expr::BinaryOp { + left: Box::new(Expr::BinaryOp { + left: Box::new(Expr::Identifier(Ident { + value: "foo".to_string(), + quote_style: None, + })), + op: arrow_operator, + right: Box::new(Expr::Value(Value::SingleQuotedString("bar".to_string()))), + }), + op: BinaryOperator::Eq, + right: Box::new(Expr::Value(Value::SingleQuotedString("spam".to_string()))), + } ) } #[test] fn arrow_precedence() { - check_arrow_precedence("SELECT foo -> 'bar' = 'spam'", BinaryOperator::Arrow); + check_arrow_precedence("foo -> 'bar' = 'spam'", BinaryOperator::Arrow); } #[test] fn long_arrow_precedence() { - check_arrow_precedence("SELECT foo ->> 'bar' = 'spam'", BinaryOperator::LongArrow); + check_arrow_precedence("foo ->> 'bar' = 'spam'", BinaryOperator::LongArrow); } #[test] fn arrow_cast_precedence() { // check this matches postgres where you would need `(foo -> 'bar')::TEXT` - let stmt = pg().verified_stmt("SELECT foo -> 'bar'::TEXT"); + let stmt = pg().verified_expr("foo -> 'bar'::TEXT"); assert_eq!( stmt, - Statement::Query(Box::new(Query { - with: None, - body: Box::new(SetExpr::Select(Box::new(Select { - distinct: None, - top: None, - projection: vec![SelectItem::UnnamedExpr(Expr::BinaryOp { - left: Box::new(Expr::Identifier(Ident { - value: "foo".to_string(), - quote_style: None, - })), - op: BinaryOperator::Arrow, - right: Box::new(Expr::Cast { - kind: CastKind::DoubleColon, - expr: Box::new(Expr::Value(Value::SingleQuotedString("bar".to_string()))), - data_type: DataType::Text, - format: None, - }), - })], - into: None, - from: vec![], - lateral_views: vec![], - prewhere: None, - selection: None, - group_by: GroupByExpr::Expressions(vec![], vec![]), - cluster_by: vec![], - distribute_by: vec![], - sort_by: vec![], - having: None, - named_window: vec![], - qualify: None, - window_before_qualify: false, - value_table_mode: None, - connect_by: None, - }))), - order_by: None, - limit: None, - limit_by: vec![], - offset: None, - fetch: None, - locks: vec![], - for_clause: None, - settings: None, - format_clause: None, - })) + Expr::BinaryOp { + left: Box::new(Expr::Identifier(Ident { + value: "foo".to_string(), + quote_style: None, + })), + op: BinaryOperator::Arrow, + right: Box::new(Expr::Cast { + kind: CastKind::DoubleColon, + expr: Box::new(Expr::Value(Value::SingleQuotedString("bar".to_string()))), + data_type: DataType::Text, + format: None, + }), + } ) } From ca5262c13f5b7587c1700f26c97e147676981f6e Mon Sep 17 00:00:00 2001 From: hulk Date: Tue, 13 Aug 2024 18:59:19 +0800 Subject: [PATCH 27/57] Use the local GitHub Action to replace setup-rust-action (#1371) --- .github/actions/setup-builder/action.yaml | 42 ++++++++++++++++++++ .github/workflows/rust.yml | 47 +++++++++++------------ 2 files changed, 64 insertions(+), 25 deletions(-) create mode 100644 .github/actions/setup-builder/action.yaml diff --git a/.github/actions/setup-builder/action.yaml b/.github/actions/setup-builder/action.yaml new file mode 100644 index 000000000..61faa055b --- /dev/null +++ b/.github/actions/setup-builder/action.yaml @@ -0,0 +1,42 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: Prepare Rust Builder +description: 'Prepare Rust Build Environment' +inputs: + rust-version: + description: 'version of rust to install (e.g. stable)' + required: true + default: 'stable' + targets: + description: 'The toolchain targets to add, comma-separated' + default: '' + +runs: + using: "composite" + steps: + - name: Setup Rust Toolchain + shell: bash + run: | + echo "Installing ${{ inputs.rust-version }}" + if [ -n "${{ inputs.targets}}" ]; then + rustup toolchain install ${{ inputs.rust-version }} -t ${{ inputs.targets }} + else + rustup toolchain install ${{ inputs.rust-version }} + fi + rustup default ${{ inputs.rust-version }} + rustup component add rustfmt clippy diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 1d2c34276..146ea3120 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -7,32 +7,29 @@ jobs: codestyle: runs-on: ubuntu-latest steps: - - name: Set up Rust - uses: hecrj/setup-rust-action@v2 + - uses: actions/checkout@v4 + - name: Setup Rust Toolchain + uses: ./.github/actions/setup-builder with: - components: rustfmt # Note that `nightly` is required for `license_template_path`, as # it's an unstable feature. rust-version: nightly - - uses: actions/checkout@v4 - run: cargo +nightly fmt -- --check --config-path <(echo 'license_template_path = "HEADER"') lint: runs-on: ubuntu-latest steps: - - name: Set up Rust - uses: hecrj/setup-rust-action@v2 - with: - components: clippy - uses: actions/checkout@v4 + - name: Setup Rust Toolchain + uses: ./.github/actions/setup-builder - run: cargo clippy --all-targets --all-features -- -D warnings compile: runs-on: ubuntu-latest steps: - - name: Set up Rust - uses: hecrj/setup-rust-action@v2 - uses: actions/checkout@v4 + - name: Setup Rust Toolchain + uses: ./.github/actions/setup-builder - run: cargo check --all-targets --all-features docs: @@ -40,19 +37,19 @@ jobs: env: RUSTDOCFLAGS: "-Dwarnings" steps: - - name: Set up Rust - uses: hecrj/setup-rust-action@v2 - uses: actions/checkout@v4 + - name: Setup Rust Toolchain + uses: ./.github/actions/setup-builder - run: cargo doc --document-private-items --no-deps --workspace --all-features compile-no-std: runs-on: ubuntu-latest steps: - - name: Set up Rust - uses: hecrj/setup-rust-action@v2 + - uses: actions/checkout@v4 + - name: Setup Rust Toolchain + uses: ./.github/actions/setup-builder with: targets: 'thumbv6m-none-eabi' - - uses: actions/checkout@v4 - run: cargo check --no-default-features --target thumbv6m-none-eabi test: @@ -61,8 +58,10 @@ jobs: rust: [stable, beta, nightly] runs-on: ubuntu-latest steps: - - name: Setup Rust - uses: hecrj/setup-rust-action@v2 + - name: Checkout + uses: actions/checkout@v4 + - name: Setup Rust Toolchain + uses: ./.github/actions/setup-builder with: rust-version: ${{ matrix.rust }} - name: Install Tarpaulin @@ -71,16 +70,16 @@ jobs: crate: cargo-tarpaulin version: 0.14.2 use-tool-cache: true - - name: Checkout - uses: actions/checkout@v4 - name: Test run: cargo test --all-features test-coverage: runs-on: ubuntu-latest steps: - - name: Setup Rust - uses: hecrj/setup-rust-action@v2 + - name: Checkout + uses: actions/checkout@v4 + - name: Setup Rust Toolchain + uses: ./.github/actions/setup-builder with: rust-version: stable - name: Install Tarpaulin @@ -89,8 +88,6 @@ jobs: crate: cargo-tarpaulin version: 0.14.2 use-tool-cache: true - - name: Checkout - uses: actions/checkout@v4 - name: Coverage run: cargo tarpaulin -o Lcov --output-dir ./coverage - name: Coveralls @@ -103,9 +100,9 @@ jobs: runs-on: ubuntu-latest needs: [test] steps: - - name: Set up Rust - uses: hecrj/setup-rust-action@v2 - uses: actions/checkout@v4 + - name: Setup Rust Toolchain + uses: ./.github/actions/setup-builder - name: Publish shell: bash run: | From f5b818e74b8364fe2ffac70e3b3d13167b808215 Mon Sep 17 00:00:00 2001 From: Seve Martinez <20816697+seve-martinez@users.noreply.github.com> Date: Tue, 13 Aug 2024 05:56:18 -0700 Subject: [PATCH 28/57] supporting snowflake extract syntax (#1374) Co-authored-by: Andrew Lamb --- src/ast/mod.rs | 29 +++++++++++++++++++++++++++-- src/parser/mod.rs | 21 ++++++++++++++++++++- tests/sqlparser_bigquery.rs | 1 + tests/sqlparser_common.rs | 1 + tests/sqlparser_snowflake.rs | 29 +++++++++++++++++++++++++++++ 5 files changed, 78 insertions(+), 3 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index e0c929a9d..86e2592a3 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -477,6 +477,22 @@ pub enum CastKind { DoubleColon, } +/// `EXTRACT` syntax variants. +/// +/// In Snowflake dialect, the `EXTRACT` expression can support either the `from` syntax +/// or the comma syntax. +/// +/// See +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum ExtractSyntax { + /// `EXTRACT( FROM )` + From, + /// `EXTRACT( , )` + Comma, +} + /// An SQL expression of any type. /// /// The parser does not distinguish between expressions of different types @@ -637,13 +653,15 @@ pub enum Expr { time_zone: Box, }, /// Extract a field from a timestamp e.g. `EXTRACT(MONTH FROM foo)` + /// Or `EXTRACT(MONTH, foo)` /// /// Syntax: /// ```sql - /// EXTRACT(DateTimeField FROM ) + /// EXTRACT(DateTimeField FROM ) | EXTRACT(DateTimeField, ) /// ``` Extract { field: DateTimeField, + syntax: ExtractSyntax, expr: Box, }, /// ```sql @@ -1197,7 +1215,14 @@ impl fmt::Display for Expr { write!(f, "{expr}::{data_type}") } }, - Expr::Extract { field, expr } => write!(f, "EXTRACT({field} FROM {expr})"), + Expr::Extract { + field, + syntax, + expr, + } => match syntax { + ExtractSyntax::From => write!(f, "EXTRACT({field} FROM {expr})"), + ExtractSyntax::Comma => write!(f, "EXTRACT({field}, {expr})"), + }, Expr::Ceil { expr, field } => { if field == &DateTimeField::NoDateTime { write!(f, "CEIL({expr})") diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 9b252ce29..60a7b4d0b 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1682,12 +1682,25 @@ impl<'a> Parser<'a> { pub fn parse_extract_expr(&mut self) -> Result { self.expect_token(&Token::LParen)?; let field = self.parse_date_time_field()?; - self.expect_keyword(Keyword::FROM)?; + + let syntax = if self.parse_keyword(Keyword::FROM) { + ExtractSyntax::From + } else if self.consume_token(&Token::Comma) + && dialect_of!(self is SnowflakeDialect | GenericDialect) + { + ExtractSyntax::Comma + } else { + return Err(ParserError::ParserError( + "Expected 'FROM' or ','".to_string(), + )); + }; + let expr = self.parse_expr()?; self.expect_token(&Token::RParen)?; Ok(Expr::Extract { field, expr: Box::new(expr), + syntax, }) } @@ -1950,6 +1963,12 @@ impl<'a> Parser<'a> { } _ => self.expected("date/time field", next_token), }, + Token::SingleQuotedString(_) if dialect_of!(self is SnowflakeDialect | GenericDialect) => + { + self.prev_token(); + let custom = self.parse_identifier(false)?; + Ok(DateTimeField::Custom(custom)) + } _ => self.expected("date/time field", next_token), } } diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index a0dd5a662..134c8ddad 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -2136,6 +2136,7 @@ fn parse_extract_weekday() { assert_eq!( &Expr::Extract { field: DateTimeField::Week(Some(Ident::new("MONDAY"))), + syntax: ExtractSyntax::From, expr: Box::new(Expr::Identifier(Ident::new("d"))), }, expr_from_projection(only(&select.projection)), diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 7ec017269..293269cdd 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -2430,6 +2430,7 @@ fn parse_extract() { assert_eq!( &Expr::Extract { field: DateTimeField::Year, + syntax: ExtractSyntax::From, expr: Box::new(Expr::Identifier(Ident::new("d"))), }, expr_from_projection(only(&select.projection)), diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index eaf8c1d14..a331c7df9 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -2019,6 +2019,35 @@ fn parse_extract_custom_part() { assert_eq!( &Expr::Extract { field: DateTimeField::Custom(Ident::new("eod")), + syntax: ExtractSyntax::From, + expr: Box::new(Expr::Identifier(Ident::new("d"))), + }, + expr_from_projection(only(&select.projection)), + ); +} + +#[test] +fn parse_extract_comma() { + let sql = "SELECT EXTRACT(HOUR, d)"; + let select = snowflake_and_generic().verified_only_select(sql); + assert_eq!( + &Expr::Extract { + field: DateTimeField::Hour, + syntax: ExtractSyntax::Comma, + expr: Box::new(Expr::Identifier(Ident::new("d"))), + }, + expr_from_projection(only(&select.projection)), + ); +} + +#[test] +fn parse_extract_comma_quoted() { + let sql = "SELECT EXTRACT('hour', d)"; + let select = snowflake_and_generic().verified_only_select(sql); + assert_eq!( + &Expr::Extract { + field: DateTimeField::Custom(Ident::with_quote('\'', "hour")), + syntax: ExtractSyntax::Comma, expr: Box::new(Expr::Identifier(Ident::new("d"))), }, expr_from_projection(only(&select.projection)), From b072ce2589a16a850b456223979f75b799aaf7aa Mon Sep 17 00:00:00 2001 From: Luca Cappelletti Date: Wed, 14 Aug 2024 15:11:16 +0200 Subject: [PATCH 29/57] Adding support for parsing CREATE TRIGGER and DROP TRIGGER statements (#1352) Co-authored-by: hulk Co-authored-by: Ifeanyi Ubah Co-authored-by: Andrew Lamb --- src/ast/data_type.rs | 5 + src/ast/ddl.rs | 2 +- src/ast/mod.rs | 172 ++++++++++++- src/ast/trigger.rs | 158 ++++++++++++ src/keywords.rs | 5 +- src/parser/mod.rs | 216 +++++++++++++++- src/test_utils.rs | 1 + tests/sqlparser_postgres.rs | 488 +++++++++++++++++++++++++++++++++++- 8 files changed, 1022 insertions(+), 25 deletions(-) create mode 100644 src/ast/trigger.rs diff --git a/src/ast/data_type.rs b/src/ast/data_type.rs index e6477f56b..ff2a3ad04 100644 --- a/src/ast/data_type.rs +++ b/src/ast/data_type.rs @@ -319,6 +319,10 @@ pub enum DataType { /// [`SQLiteDialect`](crate::dialect::SQLiteDialect), from statements such /// as `CREATE TABLE t1 (a)`. Unspecified, + /// Trigger data type, returned by functions associated with triggers + /// + /// [postgresql]: https://www.postgresql.org/docs/current/plpgsql-trigger.html + Trigger, } impl fmt::Display for DataType { @@ -543,6 +547,7 @@ impl fmt::Display for DataType { write!(f, "Nested({})", display_comma_separated(fields)) } DataType::Unspecified => Ok(()), + DataType::Trigger => write!(f, "TRIGGER"), } } } diff --git a/src/ast/ddl.rs b/src/ast/ddl.rs index d207f5766..bebd98604 100644 --- a/src/ast/ddl.rs +++ b/src/ast/ddl.rs @@ -1175,7 +1175,7 @@ fn display_option_spaced(option: &Option) -> impl fmt::Displ /// ` = [ DEFERRABLE | NOT DEFERRABLE ] [ INITIALLY DEFERRED | INITIALLY IMMEDIATE ] [ ENFORCED | NOT ENFORCED ]` /// /// Used in UNIQUE and foreign key constraints. The individual settings may occur in any order. -#[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Default, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct ConstraintCharacteristics { diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 86e2592a3..ae0522ccc 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -53,6 +53,12 @@ pub use self::query::{ TableAlias, TableFactor, TableFunctionArgs, TableVersion, TableWithJoins, Top, TopQuantity, ValueTableMode, Values, WildcardAdditionalOptions, With, WithFill, }; + +pub use self::trigger::{ + TriggerEvent, TriggerExecBody, TriggerExecBodyType, TriggerObject, TriggerPeriod, + TriggerReferencing, TriggerReferencingType, +}; + pub use self::value::{ escape_double_quote_string, escape_quoted_string, DateTimeField, DollarQuotedString, TrimWhereField, Value, @@ -71,6 +77,7 @@ mod dml; pub mod helpers; mod operator; mod query; +mod trigger; mod value; #[cfg(feature = "visitor")] @@ -2282,7 +2289,7 @@ pub enum Statement { DropFunction { if_exists: bool, /// One or more function to drop - func_desc: Vec, + func_desc: Vec, /// `CASCADE` or `RESTRICT` option: Option, }, @@ -2292,7 +2299,7 @@ pub enum Statement { DropProcedure { if_exists: bool, /// One or more function to drop - proc_desc: Vec, + proc_desc: Vec, /// `CASCADE` or `RESTRICT` option: Option, }, @@ -2618,6 +2625,96 @@ pub enum Statement { /// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_a_remote_function) remote_connection: Option, }, + /// CREATE TRIGGER + /// + /// Examples: + /// + /// ```sql + /// CREATE TRIGGER trigger_name + /// BEFORE INSERT ON table_name + /// FOR EACH ROW + /// EXECUTE FUNCTION trigger_function(); + /// ``` + /// + /// Postgres: + CreateTrigger { + /// The `OR REPLACE` clause is used to re-create the trigger if it already exists. + /// + /// Example: + /// ```sql + /// CREATE OR REPLACE TRIGGER trigger_name + /// AFTER INSERT ON table_name + /// FOR EACH ROW + /// EXECUTE FUNCTION trigger_function(); + /// ``` + or_replace: bool, + /// The `CONSTRAINT` keyword is used to create a trigger as a constraint. + is_constraint: bool, + /// The name of the trigger to be created. + name: ObjectName, + /// Determines whether the function is called before, after, or instead of the event. + /// + /// Example of BEFORE: + /// + /// ```sql + /// CREATE TRIGGER trigger_name + /// BEFORE INSERT ON table_name + /// FOR EACH ROW + /// EXECUTE FUNCTION trigger_function(); + /// ``` + /// + /// Example of AFTER: + /// + /// ```sql + /// CREATE TRIGGER trigger_name + /// AFTER INSERT ON table_name + /// FOR EACH ROW + /// EXECUTE FUNCTION trigger_function(); + /// ``` + /// + /// Example of INSTEAD OF: + /// + /// ```sql + /// CREATE TRIGGER trigger_name + /// INSTEAD OF INSERT ON table_name + /// FOR EACH ROW + /// EXECUTE FUNCTION trigger_function(); + /// ``` + period: TriggerPeriod, + /// Multiple events can be specified using OR, such as `INSERT`, `UPDATE`, `DELETE`, or `TRUNCATE`. + events: Vec, + /// The table on which the trigger is to be created. + table_name: ObjectName, + /// The optional referenced table name that can be referenced via + /// the `FROM` keyword. + referenced_table_name: Option, + /// This keyword immediately precedes the declaration of one or two relation names that provide access to the transition relations of the triggering statement. + referencing: Vec, + /// This specifies whether the trigger function should be fired once for + /// every row affected by the trigger event, or just once per SQL statement. + trigger_object: TriggerObject, + /// Whether to include the `EACH` term of the `FOR EACH`, as it is optional syntax. + include_each: bool, + /// Triggering conditions + condition: Option, + /// Execute logic block + exec_body: TriggerExecBody, + /// The characteristic of the trigger, which include whether the trigger is `DEFERRABLE`, `INITIALLY DEFERRED`, or `INITIALLY IMMEDIATE`, + characteristics: Option, + }, + /// DROP TRIGGER + /// + /// ```sql + /// DROP TRIGGER [ IF EXISTS ] name ON table_name [ CASCADE | RESTRICT ] + /// ``` + /// + DropTrigger { + if_exists: bool, + trigger_name: ObjectName, + table_name: ObjectName, + /// `CASCADE` or `RESTRICT` + option: Option, + }, /// ```sql /// CREATE PROCEDURE /// ``` @@ -3394,6 +3491,71 @@ impl fmt::Display for Statement { } Ok(()) } + Statement::CreateTrigger { + or_replace, + is_constraint, + name, + period, + events, + table_name, + referenced_table_name, + referencing, + trigger_object, + condition, + include_each, + exec_body, + characteristics, + } => { + write!( + f, + "CREATE {or_replace}{is_constraint}TRIGGER {name} {period}", + or_replace = if *or_replace { "OR REPLACE " } else { "" }, + is_constraint = if *is_constraint { "CONSTRAINT " } else { "" }, + )?; + + if !events.is_empty() { + write!(f, " {}", display_separated(events, " OR "))?; + } + write!(f, " ON {table_name}")?; + + if let Some(referenced_table_name) = referenced_table_name { + write!(f, " FROM {referenced_table_name}")?; + } + + if let Some(characteristics) = characteristics { + write!(f, " {characteristics}")?; + } + + if !referencing.is_empty() { + write!(f, " REFERENCING {}", display_separated(referencing, " "))?; + } + + if *include_each { + write!(f, " FOR EACH {trigger_object}")?; + } else { + write!(f, " FOR {trigger_object}")?; + } + if let Some(condition) = condition { + write!(f, " WHEN {condition}")?; + } + write!(f, " EXECUTE {exec_body}") + } + Statement::DropTrigger { + if_exists, + trigger_name, + table_name, + option, + } => { + write!(f, "DROP TRIGGER")?; + if *if_exists { + write!(f, " IF EXISTS")?; + } + write!(f, " {trigger_name} ON {table_name}")?; + if let Some(option) = option { + write!(f, " {option}")?; + } + Ok(()) + } Statement::CreateProcedure { name, or_alter, @@ -6026,16 +6188,16 @@ impl fmt::Display for DropFunctionOption { } } -/// Function describe in DROP FUNCTION. +/// Generic function description for DROP FUNCTION and CREATE TRIGGER. #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] -pub struct DropFunctionDesc { +pub struct FunctionDesc { pub name: ObjectName, pub args: Option>, } -impl fmt::Display for DropFunctionDesc { +impl fmt::Display for FunctionDesc { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "{}", self.name)?; if let Some(args) = &self.args { diff --git a/src/ast/trigger.rs b/src/ast/trigger.rs new file mode 100644 index 000000000..a0913db94 --- /dev/null +++ b/src/ast/trigger.rs @@ -0,0 +1,158 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! SQL Abstract Syntax Tree (AST) for triggers. +use super::*; + +/// This specifies whether the trigger function should be fired once for every row affected by the trigger event, or just once per SQL statement. +#[derive(Debug, Clone, Copy, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum TriggerObject { + Row, + Statement, +} + +impl fmt::Display for TriggerObject { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + TriggerObject::Row => write!(f, "ROW"), + TriggerObject::Statement => write!(f, "STATEMENT"), + } + } +} + +/// This clause indicates whether the following relation name is for the before-image transition relation or the after-image transition relation +#[derive(Debug, Clone, Copy, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum TriggerReferencingType { + OldTable, + NewTable, +} + +impl fmt::Display for TriggerReferencingType { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + TriggerReferencingType::OldTable => write!(f, "OLD TABLE"), + TriggerReferencingType::NewTable => write!(f, "NEW TABLE"), + } + } +} + +/// This keyword immediately precedes the declaration of one or two relation names that provide access to the transition relations of the triggering statement +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct TriggerReferencing { + pub refer_type: TriggerReferencingType, + pub is_as: bool, + pub transition_relation_name: ObjectName, +} + +impl fmt::Display for TriggerReferencing { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "{refer_type}{is_as} {relation_name}", + refer_type = self.refer_type, + is_as = if self.is_as { " AS" } else { "" }, + relation_name = self.transition_relation_name + ) + } +} + +/// Used to describe trigger events +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum TriggerEvent { + Insert, + Update(Vec), + Delete, + Truncate, +} + +impl fmt::Display for TriggerEvent { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + TriggerEvent::Insert => write!(f, "INSERT"), + TriggerEvent::Update(columns) => { + write!(f, "UPDATE")?; + if !columns.is_empty() { + write!(f, " OF")?; + write!(f, " {}", display_comma_separated(columns))?; + } + Ok(()) + } + TriggerEvent::Delete => write!(f, "DELETE"), + TriggerEvent::Truncate => write!(f, "TRUNCATE"), + } + } +} + +/// Trigger period +#[derive(Debug, Clone, Copy, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum TriggerPeriod { + After, + Before, + InsteadOf, +} + +impl fmt::Display for TriggerPeriod { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + TriggerPeriod::After => write!(f, "AFTER"), + TriggerPeriod::Before => write!(f, "BEFORE"), + TriggerPeriod::InsteadOf => write!(f, "INSTEAD OF"), + } + } +} + +/// Types of trigger body execution body. +#[derive(Debug, Clone, Copy, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum TriggerExecBodyType { + Function, + Procedure, +} + +impl fmt::Display for TriggerExecBodyType { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + TriggerExecBodyType::Function => write!(f, "FUNCTION"), + TriggerExecBodyType::Procedure => write!(f, "PROCEDURE"), + } + } +} +/// This keyword immediately precedes the declaration of one or two relation names that provide access to the transition relations of the triggering statement +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct TriggerExecBody { + pub exec_type: TriggerExecBodyType, + pub func_desc: FunctionDesc, +} + +impl fmt::Display for TriggerExecBody { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "{exec_type} {func_desc}", + exec_type = self.exec_type, + func_desc = self.func_desc + ) + } +} diff --git a/src/keywords.rs b/src/keywords.rs index c175da874..0c9d3dd6c 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -20,7 +20,7 @@ //! As a matter of fact, most of these keywords are not used at all //! and could be removed. //! 3) a `RESERVED_FOR_TABLE_ALIAS` array with keywords reserved in a -//! "table alias" context. +//! "table alias" context. #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; @@ -108,6 +108,7 @@ define_keywords!( AVRO, BACKWARD, BASE64, + BEFORE, BEGIN, BEGIN_FRAME, BEGIN_PARTITION, @@ -378,6 +379,7 @@ define_keywords!( INSENSITIVE, INSERT, INSTALL, + INSTEAD, INT, INT128, INT16, @@ -683,6 +685,7 @@ define_keywords!( STABLE, STAGE, START, + STATEMENT, STATIC, STATISTICS, STATUS, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 60a7b4d0b..5706df56c 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -3368,6 +3368,25 @@ impl<'a> Parser<'a> { Ok(values) } + /// Parse a keyword-separated list of 1+ items accepted by `F` + pub fn parse_keyword_separated( + &mut self, + keyword: Keyword, + mut f: F, + ) -> Result, ParserError> + where + F: FnMut(&mut Parser<'a>) -> Result, + { + let mut values = vec![]; + loop { + values.push(f(self)?); + if !self.parse_keyword(keyword) { + break; + } + } + Ok(values) + } + pub fn parse_parenthesized(&mut self, mut f: F) -> Result where F: FnMut(&mut Parser<'a>) -> Result, @@ -3471,6 +3490,10 @@ impl<'a> Parser<'a> { self.parse_create_external_table(or_replace) } else if self.parse_keyword(Keyword::FUNCTION) { self.parse_create_function(or_replace, temporary) + } else if self.parse_keyword(Keyword::TRIGGER) { + self.parse_create_trigger(or_replace, false) + } else if self.parse_keywords(&[Keyword::CONSTRAINT, Keyword::TRIGGER]) { + self.parse_create_trigger(or_replace, true) } else if self.parse_keyword(Keyword::MACRO) { self.parse_create_macro(or_replace, temporary) } else if self.parse_keyword(Keyword::SECRET) { @@ -4061,6 +4084,180 @@ impl<'a> Parser<'a> { }) } + /// Parse statements of the DropTrigger type such as: + /// + /// ```sql + /// DROP TRIGGER [ IF EXISTS ] name ON table_name [ CASCADE | RESTRICT ] + /// ``` + pub fn parse_drop_trigger(&mut self) -> Result { + if !dialect_of!(self is PostgreSqlDialect | GenericDialect) { + self.prev_token(); + return self.expected("an object type after DROP", self.peek_token()); + } + let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); + let trigger_name = self.parse_object_name(false)?; + self.expect_keyword(Keyword::ON)?; + let table_name = self.parse_object_name(false)?; + let option = self + .parse_one_of_keywords(&[Keyword::CASCADE, Keyword::RESTRICT]) + .map(|keyword| match keyword { + Keyword::CASCADE => ReferentialAction::Cascade, + Keyword::RESTRICT => ReferentialAction::Restrict, + _ => unreachable!(), + }); + Ok(Statement::DropTrigger { + if_exists, + trigger_name, + table_name, + option, + }) + } + + pub fn parse_create_trigger( + &mut self, + or_replace: bool, + is_constraint: bool, + ) -> Result { + if !dialect_of!(self is PostgreSqlDialect | GenericDialect) { + self.prev_token(); + return self.expected("an object type after CREATE", self.peek_token()); + } + + let name = self.parse_object_name(false)?; + let period = self.parse_trigger_period()?; + + let events = self.parse_keyword_separated(Keyword::OR, Parser::parse_trigger_event)?; + self.expect_keyword(Keyword::ON)?; + let table_name = self.parse_object_name(false)?; + + let referenced_table_name = if self.parse_keyword(Keyword::FROM) { + self.parse_object_name(true).ok() + } else { + None + }; + + let characteristics = self.parse_constraint_characteristics()?; + + let mut referencing = vec![]; + if self.parse_keyword(Keyword::REFERENCING) { + while let Some(refer) = self.parse_trigger_referencing()? { + referencing.push(refer); + } + } + + self.expect_keyword(Keyword::FOR)?; + let include_each = self.parse_keyword(Keyword::EACH); + let trigger_object = + match self.expect_one_of_keywords(&[Keyword::ROW, Keyword::STATEMENT])? { + Keyword::ROW => TriggerObject::Row, + Keyword::STATEMENT => TriggerObject::Statement, + _ => unreachable!(), + }; + + let condition = self + .parse_keyword(Keyword::WHEN) + .then(|| self.parse_expr()) + .transpose()?; + + self.expect_keyword(Keyword::EXECUTE)?; + + let exec_body = self.parse_trigger_exec_body()?; + + Ok(Statement::CreateTrigger { + or_replace, + is_constraint, + name, + period, + events, + table_name, + referenced_table_name, + referencing, + trigger_object, + include_each, + condition, + exec_body, + characteristics, + }) + } + + pub fn parse_trigger_period(&mut self) -> Result { + Ok( + match self.expect_one_of_keywords(&[ + Keyword::BEFORE, + Keyword::AFTER, + Keyword::INSTEAD, + ])? { + Keyword::BEFORE => TriggerPeriod::Before, + Keyword::AFTER => TriggerPeriod::After, + Keyword::INSTEAD => self + .expect_keyword(Keyword::OF) + .map(|_| TriggerPeriod::InsteadOf)?, + _ => unreachable!(), + }, + ) + } + + pub fn parse_trigger_event(&mut self) -> Result { + Ok( + match self.expect_one_of_keywords(&[ + Keyword::INSERT, + Keyword::UPDATE, + Keyword::DELETE, + Keyword::TRUNCATE, + ])? { + Keyword::INSERT => TriggerEvent::Insert, + Keyword::UPDATE => { + if self.parse_keyword(Keyword::OF) { + let cols = self.parse_comma_separated(|ident| { + Parser::parse_identifier(ident, false) + })?; + TriggerEvent::Update(cols) + } else { + TriggerEvent::Update(vec![]) + } + } + Keyword::DELETE => TriggerEvent::Delete, + Keyword::TRUNCATE => TriggerEvent::Truncate, + _ => unreachable!(), + }, + ) + } + + pub fn parse_trigger_referencing(&mut self) -> Result, ParserError> { + let refer_type = match self.parse_one_of_keywords(&[Keyword::OLD, Keyword::NEW]) { + Some(Keyword::OLD) if self.parse_keyword(Keyword::TABLE) => { + TriggerReferencingType::OldTable + } + Some(Keyword::NEW) if self.parse_keyword(Keyword::TABLE) => { + TriggerReferencingType::NewTable + } + _ => { + return Ok(None); + } + }; + + let is_as = self.parse_keyword(Keyword::AS); + let transition_relation_name = self.parse_object_name(false)?; + Ok(Some(TriggerReferencing { + refer_type, + is_as, + transition_relation_name, + })) + } + + pub fn parse_trigger_exec_body(&mut self) -> Result { + Ok(TriggerExecBody { + exec_type: match self + .expect_one_of_keywords(&[Keyword::FUNCTION, Keyword::PROCEDURE])? + { + Keyword::FUNCTION => TriggerExecBodyType::Function, + Keyword::PROCEDURE => TriggerExecBodyType::Procedure, + _ => unreachable!(), + }, + func_desc: self.parse_function_desc()?, + }) + } + pub fn parse_create_macro( &mut self, or_replace: bool, @@ -4509,9 +4706,11 @@ impl<'a> Parser<'a> { return self.parse_drop_procedure(); } else if self.parse_keyword(Keyword::SECRET) { return self.parse_drop_secret(temporary, persistent); + } else if self.parse_keyword(Keyword::TRIGGER) { + return self.parse_drop_trigger(); } else { return self.expected( - "TABLE, VIEW, INDEX, ROLE, SCHEMA, FUNCTION, PROCEDURE, STAGE or SEQUENCE after DROP", + "TABLE, VIEW, INDEX, ROLE, SCHEMA, FUNCTION, PROCEDURE, STAGE, TRIGGER, SECRET or SEQUENCE after DROP", self.peek_token(), ); }; @@ -4550,7 +4749,7 @@ impl<'a> Parser<'a> { /// ``` fn parse_drop_function(&mut self) -> Result { let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); - let func_desc = self.parse_comma_separated(Parser::parse_drop_function_desc)?; + let func_desc = self.parse_comma_separated(Parser::parse_function_desc)?; let option = match self.parse_one_of_keywords(&[Keyword::CASCADE, Keyword::RESTRICT]) { Some(Keyword::CASCADE) => Some(ReferentialAction::Cascade), Some(Keyword::RESTRICT) => Some(ReferentialAction::Restrict), @@ -4569,7 +4768,7 @@ impl<'a> Parser<'a> { /// ``` fn parse_drop_procedure(&mut self) -> Result { let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); - let proc_desc = self.parse_comma_separated(Parser::parse_drop_function_desc)?; + let proc_desc = self.parse_comma_separated(Parser::parse_function_desc)?; let option = match self.parse_one_of_keywords(&[Keyword::CASCADE, Keyword::RESTRICT]) { Some(Keyword::CASCADE) => Some(ReferentialAction::Cascade), Some(Keyword::RESTRICT) => Some(ReferentialAction::Restrict), @@ -4583,7 +4782,7 @@ impl<'a> Parser<'a> { }) } - fn parse_drop_function_desc(&mut self) -> Result { + fn parse_function_desc(&mut self) -> Result { let name = self.parse_object_name(false)?; let args = if self.consume_token(&Token::LParen) { @@ -4598,7 +4797,7 @@ impl<'a> Parser<'a> { None }; - Ok(DropFunctionDesc { name, args }) + Ok(FunctionDesc { name, args }) } /// See [DuckDB Docs](https://duckdb.org/docs/sql/statements/create_secret.html) for more details. @@ -5882,11 +6081,7 @@ impl<'a> Parser<'a> { pub fn parse_constraint_characteristics( &mut self, ) -> Result, ParserError> { - let mut cc = ConstraintCharacteristics { - deferrable: None, - initially: None, - enforced: None, - }; + let mut cc = ConstraintCharacteristics::default(); loop { if cc.deferrable.is_none() && self.parse_keywords(&[Keyword::NOT, Keyword::DEFERRABLE]) @@ -7285,6 +7480,7 @@ impl<'a> Parser<'a> { let field_defs = self.parse_click_house_tuple_def()?; Ok(DataType::Tuple(field_defs)) } + Keyword::TRIGGER => Ok(DataType::Trigger), _ => { self.prev_token(); let type_name = self.parse_object_name(false)?; diff --git a/src/test_utils.rs b/src/test_utils.rs index d9100d351..5c05ec996 100644 --- a/src/test_utils.rs +++ b/src/test_utils.rs @@ -124,6 +124,7 @@ impl TestedDialects { } let only_statement = statements.pop().unwrap(); + if !canonical.is_empty() { assert_eq!(canonical, only_statement.to_string()) } diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index f370748d2..2f9fe86c9 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -3623,7 +3623,7 @@ fn parse_drop_function() { pg().verified_stmt(sql), Statement::DropFunction { if_exists: true, - func_desc: vec![DropFunctionDesc { + func_desc: vec![FunctionDesc { name: ObjectName(vec![Ident { value: "test_func".to_string(), quote_style: None @@ -3639,7 +3639,7 @@ fn parse_drop_function() { pg().verified_stmt(sql), Statement::DropFunction { if_exists: true, - func_desc: vec![DropFunctionDesc { + func_desc: vec![FunctionDesc { name: ObjectName(vec![Ident { value: "test_func".to_string(), quote_style: None @@ -3664,7 +3664,7 @@ fn parse_drop_function() { Statement::DropFunction { if_exists: true, func_desc: vec![ - DropFunctionDesc { + FunctionDesc { name: ObjectName(vec![Ident { value: "test_func1".to_string(), quote_style: None @@ -3682,7 +3682,7 @@ fn parse_drop_function() { } ]), }, - DropFunctionDesc { + FunctionDesc { name: ObjectName(vec![Ident { value: "test_func2".to_string(), quote_style: None @@ -3713,7 +3713,7 @@ fn parse_drop_procedure() { pg().verified_stmt(sql), Statement::DropProcedure { if_exists: true, - proc_desc: vec![DropFunctionDesc { + proc_desc: vec![FunctionDesc { name: ObjectName(vec![Ident { value: "test_proc".to_string(), quote_style: None @@ -3729,7 +3729,7 @@ fn parse_drop_procedure() { pg().verified_stmt(sql), Statement::DropProcedure { if_exists: true, - proc_desc: vec![DropFunctionDesc { + proc_desc: vec![FunctionDesc { name: ObjectName(vec![Ident { value: "test_proc".to_string(), quote_style: None @@ -3754,7 +3754,7 @@ fn parse_drop_procedure() { Statement::DropProcedure { if_exists: true, proc_desc: vec![ - DropFunctionDesc { + FunctionDesc { name: ObjectName(vec![Ident { value: "test_proc1".to_string(), quote_style: None @@ -3772,7 +3772,7 @@ fn parse_drop_procedure() { } ]), }, - DropFunctionDesc { + FunctionDesc { name: ObjectName(vec![Ident { value: "test_proc2".to_string(), quote_style: None @@ -4455,6 +4455,478 @@ fn test_escaped_string_literal() { } } +#[test] +fn parse_create_simple_before_insert_trigger() { + let sql = "CREATE TRIGGER check_insert BEFORE INSERT ON accounts FOR EACH ROW EXECUTE FUNCTION check_account_insert"; + let expected = Statement::CreateTrigger { + or_replace: false, + is_constraint: false, + name: ObjectName(vec![Ident::new("check_insert")]), + period: TriggerPeriod::Before, + events: vec![TriggerEvent::Insert], + table_name: ObjectName(vec![Ident::new("accounts")]), + referenced_table_name: None, + referencing: vec![], + trigger_object: TriggerObject::Row, + include_each: true, + condition: None, + exec_body: TriggerExecBody { + exec_type: TriggerExecBodyType::Function, + func_desc: FunctionDesc { + name: ObjectName(vec![Ident::new("check_account_insert")]), + args: None, + }, + }, + characteristics: None, + }; + + assert_eq!(pg().verified_stmt(sql), expected); +} + +#[test] +fn parse_create_after_update_trigger_with_condition() { + let sql = "CREATE TRIGGER check_update AFTER UPDATE ON accounts FOR EACH ROW WHEN (NEW.balance > 10000) EXECUTE FUNCTION check_account_update"; + let expected = Statement::CreateTrigger { + or_replace: false, + is_constraint: false, + name: ObjectName(vec![Ident::new("check_update")]), + period: TriggerPeriod::After, + events: vec![TriggerEvent::Update(vec![])], + table_name: ObjectName(vec![Ident::new("accounts")]), + referenced_table_name: None, + referencing: vec![], + trigger_object: TriggerObject::Row, + include_each: true, + condition: Some(Expr::Nested(Box::new(Expr::BinaryOp { + left: Box::new(Expr::CompoundIdentifier(vec![ + Ident::new("NEW"), + Ident::new("balance"), + ])), + op: BinaryOperator::Gt, + right: Box::new(Expr::Value(number("10000"))), + }))), + exec_body: TriggerExecBody { + exec_type: TriggerExecBodyType::Function, + func_desc: FunctionDesc { + name: ObjectName(vec![Ident::new("check_account_update")]), + args: None, + }, + }, + characteristics: None, + }; + + assert_eq!(pg().verified_stmt(sql), expected); +} + +#[test] +fn parse_create_instead_of_delete_trigger() { + let sql = "CREATE TRIGGER check_delete INSTEAD OF DELETE ON accounts FOR EACH ROW EXECUTE FUNCTION check_account_deletes"; + let expected = Statement::CreateTrigger { + or_replace: false, + is_constraint: false, + name: ObjectName(vec![Ident::new("check_delete")]), + period: TriggerPeriod::InsteadOf, + events: vec![TriggerEvent::Delete], + table_name: ObjectName(vec![Ident::new("accounts")]), + referenced_table_name: None, + referencing: vec![], + trigger_object: TriggerObject::Row, + include_each: true, + condition: None, + exec_body: TriggerExecBody { + exec_type: TriggerExecBodyType::Function, + func_desc: FunctionDesc { + name: ObjectName(vec![Ident::new("check_account_deletes")]), + args: None, + }, + }, + characteristics: None, + }; + + assert_eq!(pg().verified_stmt(sql), expected); +} + +#[test] +fn parse_create_trigger_with_multiple_events_and_deferrable() { + let sql = "CREATE CONSTRAINT TRIGGER check_multiple_events BEFORE INSERT OR UPDATE OR DELETE ON accounts DEFERRABLE INITIALLY DEFERRED FOR EACH ROW EXECUTE FUNCTION check_account_changes"; + let expected = Statement::CreateTrigger { + or_replace: false, + is_constraint: true, + name: ObjectName(vec![Ident::new("check_multiple_events")]), + period: TriggerPeriod::Before, + events: vec![ + TriggerEvent::Insert, + TriggerEvent::Update(vec![]), + TriggerEvent::Delete, + ], + table_name: ObjectName(vec![Ident::new("accounts")]), + referenced_table_name: None, + referencing: vec![], + trigger_object: TriggerObject::Row, + include_each: true, + condition: None, + exec_body: TriggerExecBody { + exec_type: TriggerExecBodyType::Function, + func_desc: FunctionDesc { + name: ObjectName(vec![Ident::new("check_account_changes")]), + args: None, + }, + }, + characteristics: Some(ConstraintCharacteristics { + deferrable: Some(true), + initially: Some(DeferrableInitial::Deferred), + enforced: None, + }), + }; + + assert_eq!(pg().verified_stmt(sql), expected); +} + +#[test] +fn parse_create_trigger_with_referencing() { + let sql = "CREATE TRIGGER check_referencing BEFORE INSERT ON accounts REFERENCING NEW TABLE AS new_accounts OLD TABLE AS old_accounts FOR EACH ROW EXECUTE FUNCTION check_account_referencing"; + let expected = Statement::CreateTrigger { + or_replace: false, + is_constraint: false, + name: ObjectName(vec![Ident::new("check_referencing")]), + period: TriggerPeriod::Before, + events: vec![TriggerEvent::Insert], + table_name: ObjectName(vec![Ident::new("accounts")]), + referenced_table_name: None, + referencing: vec![ + TriggerReferencing { + refer_type: TriggerReferencingType::NewTable, + is_as: true, + transition_relation_name: ObjectName(vec![Ident::new("new_accounts")]), + }, + TriggerReferencing { + refer_type: TriggerReferencingType::OldTable, + is_as: true, + transition_relation_name: ObjectName(vec![Ident::new("old_accounts")]), + }, + ], + trigger_object: TriggerObject::Row, + include_each: true, + condition: None, + exec_body: TriggerExecBody { + exec_type: TriggerExecBodyType::Function, + func_desc: FunctionDesc { + name: ObjectName(vec![Ident::new("check_account_referencing")]), + args: None, + }, + }, + characteristics: None, + }; + + assert_eq!(pg().verified_stmt(sql), expected); +} + +#[test] +/// While in the parse_create_trigger test we test the full syntax of the CREATE TRIGGER statement, +/// here we test the invalid cases of the CREATE TRIGGER statement which should cause an appropriate +/// error to be returned. +fn parse_create_trigger_invalid_cases() { + // Test invalid cases for the CREATE TRIGGER statement + let invalid_cases = vec![ + ( + "CREATE TRIGGER check_update BEFORE UPDATE ON accounts FUNCTION check_account_update", + "Expected: FOR, found: FUNCTION" + ), + ( + "CREATE TRIGGER check_update TOMORROW UPDATE ON accounts EXECUTE FUNCTION check_account_update", + "Expected: one of BEFORE or AFTER or INSTEAD, found: TOMORROW" + ), + ( + "CREATE TRIGGER check_update BEFORE SAVE ON accounts EXECUTE FUNCTION check_account_update", + "Expected: one of INSERT or UPDATE or DELETE or TRUNCATE, found: SAVE" + ) + ]; + + for (sql, expected_error) in invalid_cases { + let res = pg().parse_sql_statements(sql); + assert_eq!( + format!("sql parser error: {expected_error}"), + res.unwrap_err().to_string() + ); + } +} + +#[test] +fn parse_drop_trigger() { + for if_exists in [true, false] { + for option in [ + None, + Some(ReferentialAction::Cascade), + Some(ReferentialAction::Restrict), + ] { + let sql = &format!( + "DROP TRIGGER{} check_update ON table_name{}", + if if_exists { " IF EXISTS" } else { "" }, + option + .map(|o| format!(" {}", o)) + .unwrap_or_else(|| "".to_string()) + ); + assert_eq!( + pg().verified_stmt(sql), + Statement::DropTrigger { + if_exists, + trigger_name: ObjectName(vec![Ident::new("check_update")]), + table_name: ObjectName(vec![Ident::new("table_name")]), + option + } + ); + } + } +} + +#[test] +fn parse_drop_trigger_invalid_cases() { + // Test invalid cases for the DROP TRIGGER statement + let invalid_cases = vec![ + ( + "DROP TRIGGER check_update ON table_name CASCADE RESTRICT", + "Expected: end of statement, found: RESTRICT", + ), + ( + "DROP TRIGGER check_update ON table_name CASCADE CASCADE", + "Expected: end of statement, found: CASCADE", + ), + ( + "DROP TRIGGER check_update ON table_name CASCADE CASCADE CASCADE", + "Expected: end of statement, found: CASCADE", + ), + ]; + + for (sql, expected_error) in invalid_cases { + let res = pg().parse_sql_statements(sql); + assert_eq!( + format!("sql parser error: {expected_error}"), + res.unwrap_err().to_string() + ); + } +} + +#[test] +fn parse_trigger_related_functions() { + // First we define all parts of the trigger definition, + // including the table creation, the function creation, the trigger creation and the trigger drop. + // The following example is taken from the PostgreSQL documentation + + let sql_table_creation = r#" + CREATE TABLE emp ( + empname text, + salary integer, + last_date timestamp, + last_user text + ); + "#; + + let sql_create_function = r#" + CREATE FUNCTION emp_stamp() RETURNS trigger AS $emp_stamp$ + BEGIN + -- Check that empname and salary are given + IF NEW.empname IS NULL THEN + RAISE EXCEPTION 'empname cannot be null'; + END IF; + IF NEW.salary IS NULL THEN + RAISE EXCEPTION '% cannot have null salary', NEW.empname; + END IF; + + -- Who works for us when they must pay for it? + IF NEW.salary < 0 THEN + RAISE EXCEPTION '% cannot have a negative salary', NEW.empname; + END IF; + + -- Remember who changed the payroll when + NEW.last_date := current_timestamp; + NEW.last_user := current_user; + RETURN NEW; + END; + $emp_stamp$ LANGUAGE plpgsql; + "#; + + let sql_create_trigger = r#" + CREATE TRIGGER emp_stamp BEFORE INSERT OR UPDATE ON emp + FOR EACH ROW EXECUTE FUNCTION emp_stamp(); + "#; + + let sql_drop_trigger = r#" + DROP TRIGGER emp_stamp ON emp; + "#; + + // Now we parse the statements and check if they are parsed correctly. + let mut statements = pg() + .parse_sql_statements(&format!( + "{}{}{}{}", + sql_table_creation, sql_create_function, sql_create_trigger, sql_drop_trigger + )) + .unwrap(); + + assert_eq!(statements.len(), 4); + let drop_trigger = statements.pop().unwrap(); + let create_trigger = statements.pop().unwrap(); + let create_function = statements.pop().unwrap(); + let create_table = statements.pop().unwrap(); + + // Check the first statement + let create_table = match create_table { + Statement::CreateTable(create_table) => create_table, + _ => panic!("Expected CreateTable statement"), + }; + + assert_eq!( + create_table, + CreateTable { + or_replace: false, + temporary: false, + external: false, + global: None, + if_not_exists: false, + transient: false, + volatile: false, + name: ObjectName(vec![Ident::new("emp")]), + columns: vec![ + ColumnDef { + name: "empname".into(), + data_type: DataType::Text, + collation: None, + options: vec![], + }, + ColumnDef { + name: "salary".into(), + data_type: DataType::Integer(None), + collation: None, + options: vec![], + }, + ColumnDef { + name: "last_date".into(), + data_type: DataType::Timestamp(None, TimezoneInfo::None), + collation: None, + options: vec![], + }, + ColumnDef { + name: "last_user".into(), + data_type: DataType::Text, + collation: None, + options: vec![], + }, + ], + constraints: vec![], + hive_distribution: HiveDistributionStyle::NONE, + hive_formats: Some(HiveFormat { + row_format: None, + serde_properties: None, + storage: None, + location: None + }), + table_properties: vec![], + with_options: vec![], + file_format: None, + location: None, + query: None, + without_rowid: false, + like: None, + clone: None, + engine: None, + comment: None, + auto_increment_offset: None, + default_charset: None, + collation: None, + on_commit: None, + on_cluster: None, + primary_key: None, + order_by: None, + partition_by: None, + cluster_by: None, + options: None, + strict: false, + copy_grants: false, + enable_schema_evolution: None, + change_tracking: None, + data_retention_time_in_days: None, + max_data_extension_time_in_days: None, + default_ddl_collation: None, + with_aggregation_policy: None, + with_row_access_policy: None, + with_tags: None, + } + ); + + // Check the second statement + + assert_eq!( + create_function, + Statement::CreateFunction { + or_replace: false, + temporary: false, + if_not_exists: false, + name: ObjectName(vec![Ident::new("emp_stamp")]), + args: None, + return_type: Some(DataType::Trigger), + function_body: Some( + CreateFunctionBody::AsBeforeOptions( + Expr::Value( + Value::DollarQuotedString( + DollarQuotedString { + value: "\n BEGIN\n -- Check that empname and salary are given\n IF NEW.empname IS NULL THEN\n RAISE EXCEPTION 'empname cannot be null';\n END IF;\n IF NEW.salary IS NULL THEN\n RAISE EXCEPTION '% cannot have null salary', NEW.empname;\n END IF;\n \n -- Who works for us when they must pay for it?\n IF NEW.salary < 0 THEN\n RAISE EXCEPTION '% cannot have a negative salary', NEW.empname;\n END IF;\n \n -- Remember who changed the payroll when\n NEW.last_date := current_timestamp;\n NEW.last_user := current_user;\n RETURN NEW;\n END;\n ".to_owned(), + tag: Some( + "emp_stamp".to_owned(), + ), + }, + ), + ), + ), + ), + behavior: None, + called_on_null: None, + parallel: None, + using: None, + language: Some(Ident::new("plpgsql")), + determinism_specifier: None, + options: None, + remote_connection: None + } + ); + + // Check the third statement + + assert_eq!( + create_trigger, + Statement::CreateTrigger { + or_replace: false, + is_constraint: false, + name: ObjectName(vec![Ident::new("emp_stamp")]), + period: TriggerPeriod::Before, + events: vec![TriggerEvent::Insert, TriggerEvent::Update(vec![])], + table_name: ObjectName(vec![Ident::new("emp")]), + referenced_table_name: None, + referencing: vec![], + trigger_object: TriggerObject::Row, + include_each: true, + condition: None, + exec_body: TriggerExecBody { + exec_type: TriggerExecBodyType::Function, + func_desc: FunctionDesc { + name: ObjectName(vec![Ident::new("emp_stamp")]), + args: None, + } + }, + characteristics: None + } + ); + + // Check the fourth statement + assert_eq!( + drop_trigger, + Statement::DropTrigger { + if_exists: false, + trigger_name: ObjectName(vec![Ident::new("emp_stamp")]), + table_name: ObjectName(vec![Ident::new("emp")]), + option: None + } + ); +} + #[test] fn test_unicode_string_literal() { let pairs = [ From c2f46ae07b8cdcfd72f5edef0274f029b1500de6 Mon Sep 17 00:00:00 2001 From: Seve Martinez <20816697+seve-martinez@users.noreply.github.com> Date: Wed, 14 Aug 2024 06:11:40 -0700 Subject: [PATCH 30/57] adding support for scale in CEIL and FLOOR functions (#1377) --- src/ast/mod.rs | 48 +++++++++++++++++++++-------- src/parser/mod.rs | 20 +++++++++--- tests/sqlparser_common.rs | 64 +++++++++++++++++++++++++++++++++++++-- 3 files changed, 113 insertions(+), 19 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index ae0522ccc..e3e9a5371 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -500,6 +500,24 @@ pub enum ExtractSyntax { Comma, } +/// The syntax used in a CEIL or FLOOR expression. +/// +/// The `CEIL/FLOOR( TO