From 11899fd0cb2b8347496daede2b33264bbef50929 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mehmet=20Emin=20KARAKA=C5=9E?= Date: Fri, 8 Mar 2024 23:03:49 +0300 Subject: [PATCH 01/42] Support `row_alias` and `col_aliases` in `INSERT` statement for mysql and generic dialects (#1136) --- src/ast/mod.rs | 21 +++++++ src/parser/mod.rs | 14 +++++ tests/sqlparser_mysql.rs | 108 +++++++++++++++++++++++++++++++++++- tests/sqlparser_postgres.rs | 9 ++- 4 files changed, 148 insertions(+), 4 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index d8688c1ab..f2cfc974c 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -1759,6 +1759,8 @@ pub enum Statement { replace_into: bool, /// Only for mysql priority: Option, + /// Only for mysql + insert_alias: Option, }, /// ```sql /// INSTALL @@ -2773,6 +2775,7 @@ impl fmt::Display for Statement { returning, replace_into, priority, + insert_alias, } => { let table_name = if let Some(alias) = table_alias { format!("{table_name} AS {alias}") @@ -2822,6 +2825,16 @@ impl fmt::Display for Statement { write!(f, "DEFAULT VALUES")?; } + if let Some(insert_alias) = insert_alias { + write!(f, " AS {0}", insert_alias.row_alias)?; + + if let Some(col_aliases) = &insert_alias.col_aliases { + if !col_aliases.is_empty() { + write!(f, " ({})", display_comma_separated(col_aliases))?; + } + } + } + if let Some(on) = on { write!(f, "{on}")?; } @@ -4194,6 +4207,14 @@ pub enum OnInsert { OnConflict(OnConflict), } +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct InsertAliases { + pub row_alias: ObjectName, + pub col_aliases: Option>, +} + #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 6d7ac3604..145e19007 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -8419,6 +8419,19 @@ impl<'a> Parser<'a> { (columns, partitioned, after_columns, source) }; + let insert_alias = if dialect_of!(self is MySqlDialect | GenericDialect) + && self.parse_keyword(Keyword::AS) + { + let row_alias = self.parse_object_name(false)?; + let col_aliases = Some(self.parse_parenthesized_column_list(Optional, false)?); + Some(InsertAliases { + row_alias, + col_aliases, + }) + } else { + None + }; + let on = if self.parse_keyword(Keyword::ON) { if self.parse_keyword(Keyword::CONFLICT) { let conflict_target = @@ -8488,6 +8501,7 @@ impl<'a> Parser<'a> { returning, replace_into, priority, + insert_alias, }) } } diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index 06982be49..af2a2184a 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -19,7 +19,7 @@ use matches::assert_matches; use sqlparser::ast::MysqlInsertPriority::{Delayed, HighPriority, LowPriority}; use sqlparser::ast::*; use sqlparser::dialect::{GenericDialect, MySqlDialect}; -use sqlparser::parser::ParserOptions; +use sqlparser::parser::{ParserError, ParserOptions}; use sqlparser::tokenizer::Token; use test_utils::*; @@ -1330,6 +1330,112 @@ fn parse_priority_insert() { } } +#[test] +fn parse_insert_as() { + let sql = r"INSERT INTO `table` (`date`) VALUES ('2024-01-01') AS `alias`"; + match mysql_and_generic().verified_stmt(sql) { + Statement::Insert { + table_name, + columns, + source, + insert_alias, + .. + } => { + assert_eq!( + ObjectName(vec![Ident::with_quote('`', "table")]), + table_name + ); + assert_eq!(vec![Ident::with_quote('`', "date")], columns); + let insert_alias = insert_alias.unwrap(); + + assert_eq!( + ObjectName(vec![Ident::with_quote('`', "alias")]), + insert_alias.row_alias + ); + assert_eq!(Some(vec![]), insert_alias.col_aliases); + assert_eq!( + Some(Box::new(Query { + with: None, + body: Box::new(SetExpr::Values(Values { + explicit_row: false, + rows: vec![vec![Expr::Value(Value::SingleQuotedString( + "2024-01-01".to_string() + ))]] + })), + order_by: vec![], + limit: None, + limit_by: vec![], + offset: None, + fetch: None, + locks: vec![], + for_clause: None, + })), + source + ); + } + _ => unreachable!(), + } + + let sql = r"INSERT INTO `table` (`date`) VALUES ('2024-01-01') AS `alias` ()"; + assert!(matches!( + mysql_and_generic().parse_sql_statements(sql), + Err(ParserError::ParserError(_)) + )); + + let sql = r"INSERT INTO `table` (`id`, `date`) VALUES (1, '2024-01-01') AS `alias` (`mek_id`, `mek_date`)"; + match mysql_and_generic().verified_stmt(sql) { + Statement::Insert { + table_name, + columns, + source, + insert_alias, + .. + } => { + assert_eq!( + ObjectName(vec![Ident::with_quote('`', "table")]), + table_name + ); + assert_eq!( + vec![Ident::with_quote('`', "id"), Ident::with_quote('`', "date")], + columns + ); + let insert_alias = insert_alias.unwrap(); + assert_eq!( + ObjectName(vec![Ident::with_quote('`', "alias")]), + insert_alias.row_alias + ); + assert_eq!( + Some(vec![ + Ident::with_quote('`', "mek_id"), + Ident::with_quote('`', "mek_date") + ]), + insert_alias.col_aliases + ); + assert_eq!( + Some(Box::new(Query { + with: None, + body: Box::new(SetExpr::Values(Values { + explicit_row: false, + rows: vec![vec![ + Expr::Value(number("1")), + Expr::Value(Value::SingleQuotedString("2024-01-01".to_string())) + ]] + })), + order_by: vec![], + limit: None, + limit_by: vec![], + offset: None, + fetch: None, + locks: vec![], + for_clause: None, + })), + source + ); + } + _ => unreachable!(), + } +} + #[test] fn parse_replace_insert() { let sql = r"REPLACE DELAYED INTO tasks (title, priority) VALUES ('Test Some Inserts', 1)"; diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 45ec277e9..9de4b981f 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -3764,7 +3764,8 @@ fn test_simple_postgres_insert_with_alias() { on: None, returning: None, replace_into: false, - priority: None + priority: None, + insert_alias: None } ) } @@ -3830,7 +3831,8 @@ fn test_simple_postgres_insert_with_alias() { on: None, returning: None, replace_into: false, - priority: None + priority: None, + insert_alias: None } ) } @@ -3892,7 +3894,8 @@ fn test_simple_insert_with_quoted_alias() { on: None, returning: None, replace_into: false, - priority: None + priority: None, + insert_alias: None, } ) } From 929c646bba9cdef6bf774b53bae0c19e0798ef2d Mon Sep 17 00:00:00 2001 From: Michiel De Backker Date: Mon, 11 Mar 2024 21:27:25 +0100 Subject: [PATCH 02/42] Add identifier quote style to `Dialect` trait (#1170) --- src/dialect/mod.rs | 23 +++++++++++++++++++++++ src/dialect/mysql.rs | 4 ++++ src/dialect/postgresql.rs | 4 ++++ src/dialect/sqlite.rs | 4 ++++ 4 files changed, 35 insertions(+) diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index c53670263..682e5924c 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -108,6 +108,10 @@ pub trait Dialect: Debug + Any { fn is_delimited_identifier_start(&self, ch: char) -> bool { ch == '"' || ch == '`' } + /// Return the character used to quote identifiers. + fn identifier_quote_style(&self, _identifier: &str) -> Option { + None + } /// Determine if quoted characters are proper for identifier fn is_proper_identifier_inside_quotes(&self, mut _chars: Peekable>) -> bool { true @@ -262,6 +266,21 @@ mod tests { dialect_from_str(v).unwrap() } + #[test] + fn identifier_quote_style() { + let tests: Vec<(&dyn Dialect, &str, Option)> = vec![ + (&GenericDialect {}, "id", None), + (&SQLiteDialect {}, "id", Some('`')), + (&PostgreSqlDialect {}, "id", Some('"')), + ]; + + for (dialect, ident, expected) in tests { + let actual = dialect.identifier_quote_style(ident); + + assert_eq!(actual, expected); + } + } + #[test] fn parse_with_wrapped_dialect() { /// Wrapper for a dialect. In a real-world example, this wrapper @@ -283,6 +302,10 @@ mod tests { self.0.is_delimited_identifier_start(ch) } + fn identifier_quote_style(&self, identifier: &str) -> Option { + self.0.identifier_quote_style(identifier) + } + fn is_proper_identifier_inside_quotes( &self, chars: std::iter::Peekable>, diff --git a/src/dialect/mysql.rs b/src/dialect/mysql.rs index 95b358a7e..d0dbe923c 100644 --- a/src/dialect/mysql.rs +++ b/src/dialect/mysql.rs @@ -44,6 +44,10 @@ impl Dialect for MySqlDialect { ch == '`' } + fn identifier_quote_style(&self, _identifier: &str) -> Option { + Some('`') + } + fn parse_infix( &self, parser: &mut crate::parser::Parser, diff --git a/src/dialect/postgresql.rs b/src/dialect/postgresql.rs index cbd150511..f179111e0 100644 --- a/src/dialect/postgresql.rs +++ b/src/dialect/postgresql.rs @@ -21,6 +21,10 @@ use crate::tokenizer::Token; pub struct PostgreSqlDialect {} impl Dialect for PostgreSqlDialect { + fn identifier_quote_style(&self, _identifier: &str) -> Option { + Some('"') + } + fn is_identifier_start(&self, ch: char) -> bool { // See https://www.postgresql.org/docs/11/sql-syntax-lexical.html#SQL-SYNTAX-IDENTIFIERS // We don't yet support identifiers beginning with "letters with diff --git a/src/dialect/sqlite.rs b/src/dialect/sqlite.rs index 622fddee6..daad6a159 100644 --- a/src/dialect/sqlite.rs +++ b/src/dialect/sqlite.rs @@ -32,6 +32,10 @@ impl Dialect for SQLiteDialect { ch == '`' || ch == '"' || ch == '[' } + fn identifier_quote_style(&self, _identifier: &str) -> Option { + Some('`') + } + fn is_identifier_start(&self, ch: char) -> bool { // See https://www.sqlite.org/draft/tokenreq.html ch.is_ascii_lowercase() From 6b03a259aac09d7c44fbfd7f2588f31392b730d8 Mon Sep 17 00:00:00 2001 From: Ophir LOJKINE Date: Wed, 13 Mar 2024 16:08:27 +0100 Subject: [PATCH 03/42] Parse `SUBSTRING` `FROM` syntax in all dialects, reflect change in the AST (#1173) --- src/ast/mod.rs | 9 ++++-- src/dialect/mod.rs | 8 ------ src/dialect/mssql.rs | 4 --- src/parser/mod.rs | 58 +++++++++++++-------------------------- tests/sqlparser_common.rs | 45 ++++-------------------------- tests/sqlparser_mysql.rs | 4 +-- tests/sqlparser_sqlite.rs | 12 ++++++++ 7 files changed, 46 insertions(+), 94 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index f2cfc974c..3e8354e15 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -559,13 +559,18 @@ pub enum Expr { /// ```sql /// SUBSTRING( [FROM ] [FOR ]) /// ``` + /// or + /// ```sql + /// SUBSTRING(, , ) + /// ``` Substring { expr: Box, substring_from: Option>, substring_for: Option>, - // Some dialects use `SUBSTRING(expr [FROM start] [FOR len])` syntax while others omit FROM, - // FOR keywords (e.g. Microsoft SQL Server). This flags is used for formatting. + /// false if the expression is represented using the `SUBSTRING(expr [FROM start] [FOR len])` syntax + /// true if the expression is represented using the `SUBSTRING(expr, start, len)` syntax + /// This flag is used for formatting. special: bool, }, /// ```sql diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index 682e5924c..2873cca2c 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -135,10 +135,6 @@ pub trait Dialect: Debug + Any { fn supports_group_by_expr(&self) -> bool { false } - /// Returns true if the dialect supports `SUBSTRING(expr [FROM start] [FOR len])` expressions - fn supports_substring_from_for_expr(&self) -> bool { - true - } /// Returns true if the dialect supports `(NOT) IN ()` expressions fn supports_in_empty_list(&self) -> bool { false @@ -325,10 +321,6 @@ mod tests { self.0.supports_group_by_expr() } - fn supports_substring_from_for_expr(&self) -> bool { - self.0.supports_substring_from_for_expr() - } - fn supports_in_empty_list(&self) -> bool { self.0.supports_in_empty_list() } diff --git a/src/dialect/mssql.rs b/src/dialect/mssql.rs index c7bf11864..6362a52b8 100644 --- a/src/dialect/mssql.rs +++ b/src/dialect/mssql.rs @@ -40,8 +40,4 @@ impl Dialect for MsSqlDialect { fn convert_type_before_value(&self) -> bool { true } - - fn supports_substring_from_for_expr(&self) -> bool { - false - } } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 145e19007..a7190563f 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1525,47 +1525,27 @@ impl<'a> Parser<'a> { } pub fn parse_substring_expr(&mut self) -> Result { - if self.dialect.supports_substring_from_for_expr() { - // PARSE SUBSTRING (EXPR [FROM 1] [FOR 3]) - self.expect_token(&Token::LParen)?; - let expr = self.parse_expr()?; - let mut from_expr = None; - if self.parse_keyword(Keyword::FROM) || self.consume_token(&Token::Comma) { - from_expr = Some(self.parse_expr()?); - } - - let mut to_expr = None; - if self.parse_keyword(Keyword::FOR) || self.consume_token(&Token::Comma) { - to_expr = Some(self.parse_expr()?); - } - self.expect_token(&Token::RParen)?; - - Ok(Expr::Substring { - expr: Box::new(expr), - substring_from: from_expr.map(Box::new), - substring_for: to_expr.map(Box::new), - special: false, - }) - } else { - // PARSE SUBSTRING(EXPR, start, length) - self.expect_token(&Token::LParen)?; - let expr = self.parse_expr()?; - - self.expect_token(&Token::Comma)?; - let from_expr = Some(self.parse_expr()?); - - self.expect_token(&Token::Comma)?; - let to_expr = Some(self.parse_expr()?); - - self.expect_token(&Token::RParen)?; + // PARSE SUBSTRING (EXPR [FROM 1] [FOR 3]) + self.expect_token(&Token::LParen)?; + let expr = self.parse_expr()?; + let mut from_expr = None; + let special = self.consume_token(&Token::Comma); + if special || self.parse_keyword(Keyword::FROM) { + from_expr = Some(self.parse_expr()?); + } - Ok(Expr::Substring { - expr: Box::new(expr), - substring_from: from_expr.map(Box::new), - substring_for: to_expr.map(Box::new), - special: true, - }) + let mut to_expr = None; + if self.parse_keyword(Keyword::FOR) || self.consume_token(&Token::Comma) { + to_expr = Some(self.parse_expr()?); } + self.expect_token(&Token::RParen)?; + + Ok(Expr::Substring { + expr: Box::new(expr), + substring_from: from_expr.map(Box::new), + substring_for: to_expr.map(Box::new), + special, + }) } pub fn parse_overlay_expr(&mut self) -> Result { diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index f81456849..62d5f2962 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -5761,45 +5761,12 @@ fn parse_scalar_subqueries() { #[test] fn parse_substring() { - let from_for_supported_dialects = TestedDialects { - dialects: vec![ - Box::new(GenericDialect {}), - Box::new(PostgreSqlDialect {}), - Box::new(AnsiDialect {}), - Box::new(SnowflakeDialect {}), - Box::new(HiveDialect {}), - Box::new(RedshiftSqlDialect {}), - Box::new(MySqlDialect {}), - Box::new(BigQueryDialect {}), - Box::new(SQLiteDialect {}), - Box::new(DuckDbDialect {}), - ], - options: None, - }; - - let from_for_unsupported_dialects = TestedDialects { - dialects: vec![Box::new(MsSqlDialect {})], - options: None, - }; - - from_for_supported_dialects - .one_statement_parses_to("SELECT SUBSTRING('1')", "SELECT SUBSTRING('1')"); - - from_for_supported_dialects.one_statement_parses_to( - "SELECT SUBSTRING('1' FROM 1)", - "SELECT SUBSTRING('1' FROM 1)", - ); - - from_for_supported_dialects.one_statement_parses_to( - "SELECT SUBSTRING('1' FROM 1 FOR 3)", - "SELECT SUBSTRING('1' FROM 1 FOR 3)", - ); - - from_for_unsupported_dialects - .one_statement_parses_to("SELECT SUBSTRING('1', 1, 3)", "SELECT SUBSTRING('1', 1, 3)"); - - from_for_supported_dialects - .one_statement_parses_to("SELECT SUBSTRING('1' FOR 3)", "SELECT SUBSTRING('1' FOR 3)"); + verified_stmt("SELECT SUBSTRING('1')"); + verified_stmt("SELECT SUBSTRING('1' FROM 1)"); + verified_stmt("SELECT SUBSTRING('1' FROM 1 FOR 3)"); + verified_stmt("SELECT SUBSTRING('1', 1, 3)"); + verified_stmt("SELECT SUBSTRING('1', 1)"); + verified_stmt("SELECT SUBSTRING('1' FOR 3)"); } #[test] diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index af2a2184a..8ffb78ae2 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -1911,7 +1911,7 @@ fn parse_substring_in_select() { let sql = "SELECT DISTINCT SUBSTRING(description, 0, 1) FROM test"; match mysql().one_statement_parses_to( sql, - "SELECT DISTINCT SUBSTRING(description FROM 0 FOR 1) FROM test", + "SELECT DISTINCT SUBSTRING(description, 0, 1) FROM test", ) { Statement::Query(query) => { assert_eq!( @@ -1927,7 +1927,7 @@ fn parse_substring_in_select() { })), substring_from: Some(Box::new(Expr::Value(number("0")))), substring_for: Some(Box::new(Expr::Value(number("1")))), - special: false, + special: true, })], into: None, from: vec![TableWithJoins { diff --git a/tests/sqlparser_sqlite.rs b/tests/sqlparser_sqlite.rs index 6c8b507de..3452355a8 100644 --- a/tests/sqlparser_sqlite.rs +++ b/tests/sqlparser_sqlite.rs @@ -413,6 +413,18 @@ fn parse_single_quoted_identified() { sqlite().verified_only_select("SELECT 't'.*, t.'x' FROM 't'"); // TODO: add support for select 't'.x } + +#[test] +fn parse_substring() { + // SQLite supports the SUBSTRING function since v3.34, but does not support the SQL standard + // SUBSTRING(expr FROM start FOR length) syntax. + // https://www.sqlite.org/lang_corefunc.html#substr + sqlite().verified_only_select("SELECT SUBSTRING('SQLITE', 3, 4)"); + sqlite().verified_only_select("SELECT SUBSTR('SQLITE', 3, 4)"); + sqlite().verified_only_select("SELECT SUBSTRING('SQLITE', 3)"); + sqlite().verified_only_select("SELECT SUBSTR('SQLITE', 3)"); +} + #[test] fn parse_window_function_with_filter() { for func_name in [ From 44727891713114b72546facf21a335606380845a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20Milenkovi=C4=87?= Date: Sun, 24 Mar 2024 18:20:15 +0000 Subject: [PATCH 04/42] Add support for $$ in generic dialect ... (#1185) --- src/parser/mod.rs | 3 ++- tests/sqlparser_postgres.rs | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index a7190563f..674d0692b 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -5954,7 +5954,8 @@ impl<'a> Parser<'a> { pub fn parse_function_definition(&mut self) -> Result { let peek_token = self.peek_token(); match peek_token.token { - Token::DollarQuotedString(value) if dialect_of!(self is PostgreSqlDialect) => { + Token::DollarQuotedString(value) if dialect_of!(self is PostgreSqlDialect | GenericDialect) => + { self.next_token(); Ok(FunctionDefinition::DoubleDollarDef(value.value)) } diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 9de4b981f..4a92cd45c 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -3305,7 +3305,7 @@ fn parse_create_function() { let sql = "CREATE OR REPLACE FUNCTION add(a INTEGER, IN b INTEGER = 1) RETURNS INTEGER LANGUAGE SQL IMMUTABLE RETURN a + b"; assert_eq!( - pg().verified_stmt(sql), + pg_and_generic().verified_stmt(sql), Statement::CreateFunction { or_replace: true, temporary: false, From e747c9c2af08f4ea12e8d1692adf95998209e2a1 Mon Sep 17 00:00:00 2001 From: gstvg <28798827+gstvg@users.noreply.github.com> Date: Fri, 29 Mar 2024 10:39:52 -0300 Subject: [PATCH 05/42] Add support for DuckDB struct literal syntax (#1194) Co-authored-by: Andrew Lamb --- src/ast/mod.rs | 28 +++++++++++++ src/parser/mod.rs | 43 +++++++++++++++++++ tests/sqlparser_duckdb.rs | 87 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 158 insertions(+) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 3e8354e15..7818dacd3 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -347,6 +347,23 @@ impl fmt::Display for StructField { } } +/// A dictionary field within a dictionary. +/// +/// [duckdb]: https://duckdb.org/docs/sql/data_types/struct#creating-structs +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct DictionaryField { + pub key: Ident, + pub value: Box, +} + +impl fmt::Display for DictionaryField { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}: {}", self.key, self.value) + } +} + /// Options for `CAST` / `TRY_CAST` /// BigQuery: #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] @@ -687,6 +704,14 @@ pub enum Expr { expr: Box, name: Ident, }, + /// `DuckDB` specific `Struct` literal expression [1] + /// + /// Syntax: + /// ```sql + /// syntax: {'field_name': expr1[, ... ]} + /// ``` + /// [1]: https://duckdb.org/docs/sql/data_types/struct#creating-structs + Dictionary(Vec), /// An array index expression e.g. `(ARRAY[1, 2])[1]` or `(current_schemas(FALSE))[1]` ArrayIndex { obj: Box, @@ -1146,6 +1171,9 @@ impl fmt::Display for Expr { Expr::Named { expr, name } => { write!(f, "{} AS {}", expr, name) } + Expr::Dictionary(fields) => { + write!(f, "{{{}}}", display_comma_separated(fields)) + } Expr::ArrayIndex { obj, indexes } => { write!(f, "{obj}")?; for i in indexes { diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 674d0692b..2a5e9567a 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1117,6 +1117,10 @@ impl<'a> Parser<'a> { self.prev_token(); Ok(Expr::Value(self.parse_value()?)) } + Token::LBrace if dialect_of!(self is DuckDbDialect | GenericDialect) => { + self.prev_token(); + self.parse_duckdb_struct_literal() + } _ => self.expected("an expression:", next_token), }?; @@ -2127,6 +2131,45 @@ impl<'a> Parser<'a> { )) } + /// DuckDB specific: Parse a duckdb dictionary [1] + /// + /// Syntax: + /// + /// ```sql + /// {'field_name': expr1[, ... ]} + /// ``` + /// + /// [1]: https://duckdb.org/docs/sql/data_types/struct#creating-structs + fn parse_duckdb_struct_literal(&mut self) -> Result { + self.expect_token(&Token::LBrace)?; + + let fields = self.parse_comma_separated(Self::parse_duckdb_dictionary_field)?; + + self.expect_token(&Token::RBrace)?; + + Ok(Expr::Dictionary(fields)) + } + + /// Parse a field for a duckdb dictionary [1] + /// Syntax + /// ```sql + /// 'name': expr + /// ``` + /// + /// [1]: https://duckdb.org/docs/sql/data_types/struct#creating-structs + fn parse_duckdb_dictionary_field(&mut self) -> Result { + let key = self.parse_identifier(false)?; + + self.expect_token(&Token::Colon)?; + + let expr = self.parse_expr()?; + + Ok(DictionaryField { + key, + value: Box::new(expr), + }) + } + /// For nested types that use the angle bracket syntax, this matches either /// `>`, `>>` or nothing depending on which variant is expected (specified by the previously /// matched `trailing_bracket` argument). It returns whether there is a trailing diff --git a/tests/sqlparser_duckdb.rs b/tests/sqlparser_duckdb.rs index 45ae01bf0..a29d40084 100644 --- a/tests/sqlparser_duckdb.rs +++ b/tests/sqlparser_duckdb.rs @@ -246,3 +246,90 @@ fn test_duckdb_load_extension() { stmt ); } + +#[test] +fn test_duckdb_struct_literal() { + //struct literal syntax https://duckdb.org/docs/sql/data_types/struct#creating-structs + //syntax: {'field_name': expr1[, ... ]} + let sql = "SELECT {'a': 1, 'b': 2, 'c': 3}, [{'a': 'abc'}], {'a': 1, 'b': [t.str_col]}, {'a': 1, 'b': 'abc'}, {'abc': str_col}, {'a': {'aa': 1}}"; + let select = duckdb_and_generic().verified_only_select(sql); + assert_eq!(6, select.projection.len()); + assert_eq!( + &Expr::Dictionary(vec![ + DictionaryField { + key: Ident::with_quote('\'', "a"), + value: Box::new(Expr::Value(number("1"))), + }, + DictionaryField { + key: Ident::with_quote('\'', "b"), + value: Box::new(Expr::Value(number("2"))), + }, + DictionaryField { + key: Ident::with_quote('\'', "c"), + value: Box::new(Expr::Value(number("3"))), + }, + ],), + expr_from_projection(&select.projection[0]) + ); + + assert_eq!( + &Expr::Array(Array { + elem: vec![Expr::Dictionary(vec![DictionaryField { + key: Ident::with_quote('\'', "a"), + value: Box::new(Expr::Value(Value::SingleQuotedString("abc".to_string()))), + },],)], + named: false + }), + expr_from_projection(&select.projection[1]) + ); + assert_eq!( + &Expr::Dictionary(vec![ + DictionaryField { + key: Ident::with_quote('\'', "a"), + value: Box::new(Expr::Value(number("1"))), + }, + DictionaryField { + key: Ident::with_quote('\'', "b"), + value: Box::new(Expr::Array(Array { + elem: vec![Expr::CompoundIdentifier(vec![ + Ident::from("t"), + Ident::from("str_col") + ])], + named: false + })), + }, + ],), + expr_from_projection(&select.projection[2]) + ); + assert_eq!( + &Expr::Dictionary(vec![ + DictionaryField { + key: Ident::with_quote('\'', "a"), + value: Expr::Value(number("1")).into(), + }, + DictionaryField { + key: Ident::with_quote('\'', "b"), + value: Expr::Value(Value::SingleQuotedString("abc".to_string())).into(), + }, + ],), + expr_from_projection(&select.projection[3]) + ); + assert_eq!( + &Expr::Dictionary(vec![DictionaryField { + key: Ident::with_quote('\'', "abc"), + value: Expr::Identifier(Ident::from("str_col")).into(), + }],), + expr_from_projection(&select.projection[4]) + ); + assert_eq!( + &Expr::Dictionary(vec![DictionaryField { + key: Ident::with_quote('\'', "a"), + value: Expr::Dictionary(vec![DictionaryField { + key: Ident::with_quote('\'', "aa"), + value: Expr::Value(number("1")).into(), + }],) + .into(), + }],), + expr_from_projection(&select.projection[5]) + ); +} From 14b33ac493b9c2214487f3d389d4287e038f8fc0 Mon Sep 17 00:00:00 2001 From: gstvg <28798827+gstvg@users.noreply.github.com> Date: Sat, 6 Apr 2024 13:46:36 -0300 Subject: [PATCH 06/42] Add support for DuckDB functions named arguments with assignment operator (#1195) Co-authored-by: Andrew Lamb --- src/ast/mod.rs | 3 +++ src/parser/mod.rs | 17 +++++++++++++++-- src/tokenizer.rs | 8 ++++---- tests/sqlparser_duckdb.rs | 34 ++++++++++++++++++++++++++++++++++ 4 files changed, 56 insertions(+), 6 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 7818dacd3..a378b58b1 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -4584,6 +4584,8 @@ pub enum FunctionArgOperator { Equals, /// function(arg1 => value1) RightArrow, + /// function(arg1 := value1) + Assignment, } impl fmt::Display for FunctionArgOperator { @@ -4591,6 +4593,7 @@ impl fmt::Display for FunctionArgOperator { match self { FunctionArgOperator::Equals => f.write_str("="), FunctionArgOperator::RightArrow => f.write_str("=>"), + FunctionArgOperator::Assignment => f.write_str(":="), } } } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 2a5e9567a..a3d7a7cfc 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -3485,7 +3485,7 @@ impl<'a> Parser<'a> { let name = self.parse_identifier(false)?; let default_expr = - if self.consume_token(&Token::DuckAssignment) || self.consume_token(&Token::RArrow) { + if self.consume_token(&Token::Assignment) || self.consume_token(&Token::RArrow) { Some(self.parse_expr()?) } else { None @@ -4183,7 +4183,7 @@ impl<'a> Parser<'a> { self.next_token(); // Skip `DEFAULT` Some(DeclareAssignment::Default(Box::new(self.parse_expr()?))) } - Token::DuckAssignment => { + Token::Assignment => { self.next_token(); // Skip `:=` Some(DeclareAssignment::DuckAssignment(Box::new( self.parse_expr()?, @@ -8602,6 +8602,19 @@ impl<'a> Parser<'a> { arg, operator: FunctionArgOperator::Equals, }) + } else if dialect_of!(self is DuckDbDialect | GenericDialect) + && self.peek_nth_token(1) == Token::Assignment + { + let name = self.parse_identifier(false)?; + + self.expect_token(&Token::Assignment)?; + let arg = self.parse_expr()?.into(); + + Ok(FunctionArg::Named { + name, + arg, + operator: FunctionArgOperator::Assignment, + }) } else { Ok(FunctionArg::Unnamed(self.parse_wildcard_expr()?.into())) } diff --git a/src/tokenizer.rs b/src/tokenizer.rs index a1a2eae2d..e31fccca9 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -117,8 +117,8 @@ pub enum Token { Colon, /// DoubleColon `::` (used for casting in PostgreSQL) DoubleColon, - /// Assignment `:=` (used for keyword argument in DuckDB macros) - DuckAssignment, + /// Assignment `:=` (used for keyword argument in DuckDB macros and some functions, and for variable declarations in DuckDB and Snowflake) + Assignment, /// SemiColon `;` used as separator for COPY and payload SemiColon, /// Backslash `\` used in terminating the COPY payload with `\.` @@ -239,7 +239,7 @@ impl fmt::Display for Token { Token::Period => f.write_str("."), Token::Colon => f.write_str(":"), Token::DoubleColon => f.write_str("::"), - Token::DuckAssignment => f.write_str(":="), + Token::Assignment => f.write_str(":="), Token::SemiColon => f.write_str(";"), Token::Backslash => f.write_str("\\"), Token::LBracket => f.write_str("["), @@ -959,7 +959,7 @@ impl<'a> Tokenizer<'a> { chars.next(); match chars.peek() { Some(':') => self.consume_and_return(chars, Token::DoubleColon), - Some('=') => self.consume_and_return(chars, Token::DuckAssignment), + Some('=') => self.consume_and_return(chars, Token::Assignment), _ => Ok(Some(Token::Colon)), } } diff --git a/tests/sqlparser_duckdb.rs b/tests/sqlparser_duckdb.rs index a29d40084..e41109d95 100644 --- a/tests/sqlparser_duckdb.rs +++ b/tests/sqlparser_duckdb.rs @@ -333,3 +333,37 @@ fn test_duckdb_struct_literal() { expr_from_projection(&select.projection[5]) ); } + +#[test] +fn test_duckdb_named_argument_function_with_assignment_operator() { + let sql = "SELECT FUN(a := '1', b := '2') FROM foo"; + let select = duckdb_and_generic().verified_only_select(sql); + assert_eq!( + &Expr::Function(Function { + name: ObjectName(vec![Ident::new("FUN")]), + args: vec![ + FunctionArg::Named { + name: Ident::new("a"), + arg: FunctionArgExpr::Expr(Expr::Value(Value::SingleQuotedString( + "1".to_owned() + ))), + operator: FunctionArgOperator::Assignment + }, + FunctionArg::Named { + name: Ident::new("b"), + arg: FunctionArgExpr::Expr(Expr::Value(Value::SingleQuotedString( + "2".to_owned() + ))), + operator: FunctionArgOperator::Assignment + }, + ], + null_treatment: None, + filter: None, + over: None, + distinct: false, + special: false, + order_by: vec![], + }), + expr_from_projection(only(&select.projection)) + ); +} From 2bf93a470c759b0ec5898ce12bff6fa266c05c0c Mon Sep 17 00:00:00 2001 From: Daniel Imfeld Date: Sat, 6 Apr 2024 07:03:00 -1000 Subject: [PATCH 07/42] Support `PARALLEL ... and for `..ON NULL INPUT ...` to `CREATE FUNCTION` (#1202) --- src/ast/mod.rs | 52 ++++++++++++++++++++++++++++++++++++- src/keywords.rs | 5 ++++ src/parser/mod.rs | 40 ++++++++++++++++++++++++++++ tests/sqlparser_postgres.rs | 48 ++++++++++++++++++++++++++++++++-- 4 files changed, 142 insertions(+), 3 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index a378b58b1..9df0b5deb 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -5683,6 +5683,46 @@ impl fmt::Display for FunctionBehavior { } } +/// These attributes describe the behavior of the function when called with a null argument. +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum FunctionCalledOnNull { + CalledOnNullInput, + ReturnsNullOnNullInput, + Strict, +} + +impl fmt::Display for FunctionCalledOnNull { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + FunctionCalledOnNull::CalledOnNullInput => write!(f, "CALLED ON NULL INPUT"), + FunctionCalledOnNull::ReturnsNullOnNullInput => write!(f, "RETURNS NULL ON NULL INPUT"), + FunctionCalledOnNull::Strict => write!(f, "STRICT"), + } + } +} + +/// If it is safe for PostgreSQL to call the function from multiple threads at once +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum FunctionParallel { + Unsafe, + Restricted, + Safe, +} + +impl fmt::Display for FunctionParallel { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + FunctionParallel::Unsafe => write!(f, "PARALLEL UNSAFE"), + FunctionParallel::Restricted => write!(f, "PARALLEL RESTRICTED"), + FunctionParallel::Safe => write!(f, "PARALLEL SAFE"), + } + } +} + #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] @@ -5703,7 +5743,7 @@ impl fmt::Display for FunctionDefinition { /// Postgres specific feature. /// -/// See [Postgresdocs](https://www.postgresql.org/docs/15/sql-createfunction.html) +/// See [Postgres docs](https://www.postgresql.org/docs/15/sql-createfunction.html) /// for more details #[derive(Debug, Default, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] @@ -5713,6 +5753,10 @@ pub struct CreateFunctionBody { pub language: Option, /// IMMUTABLE | STABLE | VOLATILE pub behavior: Option, + /// CALLED ON NULL INPUT | RETURNS NULL ON NULL INPUT | STRICT + pub called_on_null: Option, + /// PARALLEL { UNSAFE | RESTRICTED | SAFE } + pub parallel: Option, /// AS 'definition' /// /// Note that Hive's `AS class_name` is also parsed here. @@ -5731,6 +5775,12 @@ impl fmt::Display for CreateFunctionBody { if let Some(behavior) = &self.behavior { write!(f, " {behavior}")?; } + if let Some(called_on_null) = &self.called_on_null { + write!(f, " {called_on_null}")?; + } + if let Some(parallel) = &self.parallel { + write!(f, " {parallel}")?; + } if let Some(definition) = &self.as_ { write!(f, " AS {definition}")?; } diff --git a/src/keywords.rs b/src/keywords.rs index c94a6227c..fa7d133e3 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -353,6 +353,7 @@ define_keywords!( INITIALLY, INNER, INOUT, + INPUT, INPUTFORMAT, INSENSITIVE, INSERT, @@ -498,6 +499,7 @@ define_keywords!( OVERLAY, OVERWRITE, OWNED, + PARALLEL, PARAMETER, PARQUET, PARTITION, @@ -570,6 +572,7 @@ define_keywords!( RESPECT, RESTART, RESTRICT, + RESTRICTED, RESULT, RESULTSET, RETAIN, @@ -589,6 +592,7 @@ define_keywords!( ROW_NUMBER, RULE, RUN, + SAFE, SAFE_CAST, SAVEPOINT, SCHEMA, @@ -704,6 +708,7 @@ define_keywords!( UNLOGGED, UNNEST, UNPIVOT, + UNSAFE, UNSIGNED, UNTIL, UPDATE, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index a3d7a7cfc..235c1f1df 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -3437,6 +3437,46 @@ impl<'a> Parser<'a> { } else if self.parse_keyword(Keyword::VOLATILE) { ensure_not_set(&body.behavior, "IMMUTABLE | STABLE | VOLATILE")?; body.behavior = Some(FunctionBehavior::Volatile); + } else if self.parse_keywords(&[ + Keyword::CALLED, + Keyword::ON, + Keyword::NULL, + Keyword::INPUT, + ]) { + ensure_not_set( + &body.called_on_null, + "CALLED ON NULL INPUT | RETURNS NULL ON NULL INPUT | STRICT", + )?; + body.called_on_null = Some(FunctionCalledOnNull::CalledOnNullInput); + } else if self.parse_keywords(&[ + Keyword::RETURNS, + Keyword::NULL, + Keyword::ON, + Keyword::NULL, + Keyword::INPUT, + ]) { + ensure_not_set( + &body.called_on_null, + "CALLED ON NULL INPUT | RETURNS NULL ON NULL INPUT | STRICT", + )?; + body.called_on_null = Some(FunctionCalledOnNull::ReturnsNullOnNullInput); + } else if self.parse_keyword(Keyword::STRICT) { + ensure_not_set( + &body.called_on_null, + "CALLED ON NULL INPUT | RETURNS NULL ON NULL INPUT | STRICT", + )?; + body.called_on_null = Some(FunctionCalledOnNull::Strict); + } else if self.parse_keyword(Keyword::PARALLEL) { + ensure_not_set(&body.parallel, "PARALLEL { UNSAFE | RESTRICTED | SAFE }")?; + if self.parse_keyword(Keyword::UNSAFE) { + body.parallel = Some(FunctionParallel::Unsafe); + } else if self.parse_keyword(Keyword::RESTRICTED) { + body.parallel = Some(FunctionParallel::Restricted); + } else if self.parse_keyword(Keyword::SAFE) { + body.parallel = Some(FunctionParallel::Safe); + } else { + return self.expected("one of UNSAFE | RESTRICTED | SAFE", self.peek_token()); + } } else if self.parse_keyword(Keyword::RETURN) { ensure_not_set(&body.return_, "RETURN")?; body.return_ = Some(self.parse_expr()?); diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 4a92cd45c..8515956f6 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -3280,7 +3280,7 @@ fn parse_similar_to() { #[test] fn parse_create_function() { - let sql = "CREATE FUNCTION add(INTEGER, INTEGER) RETURNS INTEGER LANGUAGE SQL IMMUTABLE AS 'select $1 + $2;'"; + let sql = "CREATE FUNCTION add(INTEGER, INTEGER) RETURNS INTEGER LANGUAGE SQL IMMUTABLE STRICT PARALLEL SAFE AS 'select $1 + $2;'"; assert_eq!( pg_and_generic().verified_stmt(sql), Statement::CreateFunction { @@ -3295,6 +3295,8 @@ fn parse_create_function() { params: CreateFunctionBody { language: Some("SQL".into()), behavior: Some(FunctionBehavior::Immutable), + called_on_null: Some(FunctionCalledOnNull::Strict), + parallel: Some(FunctionParallel::Safe), as_: Some(FunctionDefinition::SingleQuotedDef( "select $1 + $2;".into() )), @@ -3303,7 +3305,7 @@ fn parse_create_function() { } ); - let sql = "CREATE OR REPLACE FUNCTION add(a INTEGER, IN b INTEGER = 1) RETURNS INTEGER LANGUAGE SQL IMMUTABLE RETURN a + b"; + let sql = "CREATE OR REPLACE FUNCTION add(a INTEGER, IN b INTEGER = 1) RETURNS INTEGER LANGUAGE SQL IMMUTABLE RETURNS NULL ON NULL INPUT PARALLEL RESTRICTED RETURN a + b"; assert_eq!( pg_and_generic().verified_stmt(sql), Statement::CreateFunction { @@ -3323,6 +3325,40 @@ fn parse_create_function() { params: CreateFunctionBody { language: Some("SQL".into()), behavior: Some(FunctionBehavior::Immutable), + called_on_null: Some(FunctionCalledOnNull::ReturnsNullOnNullInput), + parallel: Some(FunctionParallel::Restricted), + return_: Some(Expr::BinaryOp { + left: Box::new(Expr::Identifier("a".into())), + op: BinaryOperator::Plus, + right: Box::new(Expr::Identifier("b".into())), + }), + ..Default::default() + }, + } + ); + + let sql = "CREATE OR REPLACE FUNCTION add(a INTEGER, IN b INTEGER = 1) RETURNS INTEGER LANGUAGE SQL STABLE CALLED ON NULL INPUT PARALLEL UNSAFE RETURN a + b"; + assert_eq!( + pg_and_generic().verified_stmt(sql), + Statement::CreateFunction { + or_replace: true, + temporary: false, + name: ObjectName(vec![Ident::new("add")]), + args: Some(vec![ + OperateFunctionArg::with_name("a", DataType::Integer(None)), + OperateFunctionArg { + mode: Some(ArgMode::In), + name: Some("b".into()), + data_type: DataType::Integer(None), + default_expr: Some(Expr::Value(Value::Number("1".parse().unwrap(), false))), + } + ]), + return_type: Some(DataType::Integer(None)), + params: CreateFunctionBody { + language: Some("SQL".into()), + behavior: Some(FunctionBehavior::Stable), + called_on_null: Some(FunctionCalledOnNull::CalledOnNullInput), + parallel: Some(FunctionParallel::Unsafe), return_: Some(Expr::BinaryOp { left: Box::new(Expr::Identifier("a".into())), op: BinaryOperator::Plus, @@ -3348,6 +3384,8 @@ fn parse_create_function() { params: CreateFunctionBody { language: Some("plpgsql".into()), behavior: None, + called_on_null: None, + parallel: None, return_: None, as_: Some(FunctionDefinition::DoubleDollarDef( " BEGIN RETURN i + 1; END; ".into() @@ -3358,6 +3396,12 @@ fn parse_create_function() { ); } +#[test] +fn parse_incorrect_create_function_parallel() { + let sql = "CREATE FUNCTION add(INTEGER, INTEGER) RETURNS INTEGER LANGUAGE SQL PARALLEL BLAH AS 'select $1 + $2;'"; + assert!(pg().parse_sql_statements(sql).is_err()); +} + #[test] fn parse_drop_function() { let sql = "DROP FUNCTION IF EXISTS test_func"; From e976a2ee43e3310c19c2f55942e928b27f29fc55 Mon Sep 17 00:00:00 2001 From: sunxunle <163647374+sunxunle@users.noreply.github.com> Date: Sun, 7 Apr 2024 01:06:53 +0800 Subject: [PATCH 08/42] chore: fix some comments (#1184) Signed-off-by: sunxunle --- README.md | 2 +- src/ast/mod.rs | 2 +- src/dialect/mod.rs | 2 +- tests/sqlparser_sqlite.rs | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 8a4b5d986..512f5f6c0 100644 --- a/README.md +++ b/README.md @@ -151,7 +151,7 @@ maintain this crate is limited. Please read the following sections carefully. ### New Syntax The most commonly accepted PRs add support for or fix a bug in a feature in the -SQL standard, or a a popular RDBMS, such as Microsoft SQL +SQL standard, or a popular RDBMS, such as Microsoft SQL Server or PostgreSQL, will likely be accepted after a brief review. Any SQL feature that is dialect specific should be parsed by *both* the relevant [`Dialect`] as well as [`GenericDialect`]. diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 9df0b5deb..9b3bf3f62 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -4649,7 +4649,7 @@ pub struct Function { pub args: Vec, /// e.g. `x > 5` in `COUNT(x) FILTER (WHERE x > 5)` pub filter: Option>, - // Snowflake/MSSQL supports diffrent options for null treatment in rank functions + // Snowflake/MSSQL supports different options for null treatment in rank functions pub null_treatment: Option, pub over: Option, // aggregate functions may specify eg `COUNT(DISTINCT x)` diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index 2873cca2c..0e7257b7b 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -93,7 +93,7 @@ macro_rules! dialect_of { pub trait Dialect: Debug + Any { /// Determine the [`TypeId`] of this dialect. /// - /// By default, return the same [`TypeId`] as [`Any::type_id`]. Can be overriden + /// By default, return the same [`TypeId`] as [`Any::type_id`]. Can be overridden /// by dialects that behave like other dialects /// (for example when wrapping a dialect). fn dialect(&self) -> TypeId { diff --git a/tests/sqlparser_sqlite.rs b/tests/sqlparser_sqlite.rs index 3452355a8..0352b4ec6 100644 --- a/tests/sqlparser_sqlite.rs +++ b/tests/sqlparser_sqlite.rs @@ -55,7 +55,7 @@ fn pragma_eq_style() { } } #[test] -fn pragma_funciton_style() { +fn pragma_function_style() { let sql = "PRAGMA cache_size(10)"; match sqlite_and_generic().verified_stmt(sql) { Statement::Pragma { @@ -103,7 +103,7 @@ fn pragma_function_string_style() { } #[test] -fn pragma_eq_placehoder_style() { +fn pragma_eq_placeholder_style() { let sql = "PRAGMA table_info = ?"; match sqlite_and_generic().verified_stmt(sql) { Statement::Pragma { From 3bf40485c59c29391c3ee82b2b3e3801b449b9bf Mon Sep 17 00:00:00 2001 From: Joey Hain Date: Sat, 6 Apr 2024 10:08:40 -0700 Subject: [PATCH 09/42] Fix parsing of equality binop in function argument (#1182) --- src/dialect/duckdb.rs | 4 ++++ src/dialect/mod.rs | 4 ++++ src/parser/mod.rs | 4 +++- src/test_utils.rs | 35 +++++++++++++++++++++-------------- tests/sqlparser_common.rs | 35 ++++++++++++++++++++++++++++++++--- 5 files changed, 64 insertions(+), 18 deletions(-) diff --git a/src/dialect/duckdb.rs b/src/dialect/duckdb.rs index a4f9309e6..f08545b99 100644 --- a/src/dialect/duckdb.rs +++ b/src/dialect/duckdb.rs @@ -33,4 +33,8 @@ impl Dialect for DuckDbDialect { fn supports_group_by_expr(&self) -> bool { true } + + fn supports_named_fn_args_with_eq_operator(&self) -> bool { + true + } } diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index 0e7257b7b..2463121e7 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -143,6 +143,10 @@ pub trait Dialect: Debug + Any { fn supports_start_transaction_modifier(&self) -> bool { false } + /// Returns true if the dialect supports named arguments of the form FUN(a = '1', b = '2'). + fn supports_named_fn_args_with_eq_operator(&self) -> bool { + false + } /// Returns true if the dialect has a CONVERT function which accepts a type first /// and an expression second, e.g. `CONVERT(varchar, 1)` fn convert_type_before_value(&self) -> bool { diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 235c1f1df..231de4d20 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -8631,7 +8631,9 @@ impl<'a> Parser<'a> { arg, operator: FunctionArgOperator::RightArrow, }) - } else if self.peek_nth_token(1) == Token::Eq { + } else if self.dialect.supports_named_fn_args_with_eq_operator() + && self.peek_nth_token(1) == Token::Eq + { let name = self.parse_identifier(false)?; self.expect_token(&Token::Eq)?; diff --git a/src/test_utils.rs b/src/test_utils.rs index 4a54a6826..dd198f7dd 100644 --- a/src/test_utils.rs +++ b/src/test_utils.rs @@ -68,7 +68,7 @@ impl TestedDialects { } Some((dialect, parsed)) }) - .unwrap() + .expect("tested dialects cannot be empty") .1 } @@ -195,15 +195,6 @@ impl TestedDialects { /// Returns all available dialects. pub fn all_dialects() -> TestedDialects { - all_dialects_except(|_| false) -} - -/// Returns available dialects. The `except` predicate is used -/// to filter out specific dialects. -pub fn all_dialects_except(except: F) -> TestedDialects -where - F: Fn(&dyn Dialect) -> bool, -{ let all_dialects = vec![ Box::new(GenericDialect {}) as Box, Box::new(PostgreSqlDialect {}) as Box, @@ -218,14 +209,30 @@ where Box::new(DuckDbDialect {}) as Box, ]; TestedDialects { - dialects: all_dialects - .into_iter() - .filter(|d| !except(d.as_ref())) - .collect(), + dialects: all_dialects, options: None, } } +/// Returns all dialects matching the given predicate. +pub fn all_dialects_where(predicate: F) -> TestedDialects +where + F: Fn(&dyn Dialect) -> bool, +{ + let mut dialects = all_dialects(); + dialects.dialects.retain(|d| predicate(&**d)); + dialects +} + +/// Returns available dialects. The `except` predicate is used +/// to filter out specific dialects. +pub fn all_dialects_except(except: F) -> TestedDialects +where + F: Fn(&dyn Dialect) -> bool, +{ + all_dialects_where(|d| !except(d)) +} + pub fn assert_eq_vec(expected: &[&str], actual: &[T]) { assert_eq!( expected, diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 62d5f2962..f78eda0cc 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -33,8 +33,8 @@ use sqlparser::keywords::ALL_KEYWORDS; use sqlparser::parser::{Parser, ParserError, ParserOptions}; use sqlparser::tokenizer::Tokenizer; use test_utils::{ - all_dialects, alter_table_op, assert_eq_vec, expr_from_projection, join, number, only, table, - table_alias, TestedDialects, + all_dialects, all_dialects_where, alter_table_op, assert_eq_vec, expr_from_projection, join, + number, only, table, table_alias, TestedDialects, }; #[macro_use] @@ -4045,7 +4045,9 @@ fn parse_named_argument_function() { #[test] fn parse_named_argument_function_with_eq_operator() { let sql = "SELECT FUN(a = '1', b = '2') FROM foo"; - let select = verified_only_select(sql); + + let select = all_dialects_where(|d| d.supports_named_fn_args_with_eq_operator()) + .verified_only_select(sql); assert_eq!( &Expr::Function(Function { name: ObjectName(vec![Ident::new("FUN")]), @@ -4074,6 +4076,33 @@ fn parse_named_argument_function_with_eq_operator() { }), expr_from_projection(only(&select.projection)) ); + + // Ensure that bar = 42 in a function argument parses as an equality binop + // rather than a named function argument. + assert_eq!( + all_dialects_except(|d| d.supports_named_fn_args_with_eq_operator()) + .verified_expr("foo(bar = 42)"), + Expr::Function(Function { + name: ObjectName(vec![Ident::new("foo")]), + args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr( + Expr::BinaryOp { + left: Box::new(Expr::Identifier(Ident::new("bar"))), + op: BinaryOperator::Eq, + right: Box::new(Expr::Value(number("42"))), + }, + ))], + filter: None, + null_treatment: None, + over: None, + distinct: false, + special: false, + order_by: vec![], + }) + ); + + // TODO: should this parse for all dialects? + all_dialects_except(|d| d.supports_named_fn_args_with_eq_operator()) + .verified_expr("iff(1 = 1, 1, 0)"); } #[test] From 05af4e049c1da491c9823d79d5fbc0dd4e9af36a Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Sun, 7 Apr 2024 07:08:55 -0400 Subject: [PATCH 10/42] Cleanup CREATE FUNCTION tests (#1203) --- tests/sqlparser_postgres.rs | 102 ++++-------------------------------- 1 file changed, 9 insertions(+), 93 deletions(-) diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 8515956f6..3747aef70 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -804,9 +804,7 @@ Kwara & Kogi PHP ₱ USD $ \N Some other value \\."#; - let ast = pg_and_generic().one_statement_parses_to(sql, ""); - println!("{ast:#?}"); - //assert_eq!(sql, ast.to_string()); + pg_and_generic().one_statement_parses_to(sql, ""); } #[test] @@ -3304,98 +3302,16 @@ fn parse_create_function() { }, } ); - - let sql = "CREATE OR REPLACE FUNCTION add(a INTEGER, IN b INTEGER = 1) RETURNS INTEGER LANGUAGE SQL IMMUTABLE RETURNS NULL ON NULL INPUT PARALLEL RESTRICTED RETURN a + b"; - assert_eq!( - pg_and_generic().verified_stmt(sql), - Statement::CreateFunction { - or_replace: true, - temporary: false, - name: ObjectName(vec![Ident::new("add")]), - args: Some(vec![ - OperateFunctionArg::with_name("a", DataType::Integer(None)), - OperateFunctionArg { - mode: Some(ArgMode::In), - name: Some("b".into()), - data_type: DataType::Integer(None), - default_expr: Some(Expr::Value(Value::Number("1".parse().unwrap(), false))), - } - ]), - return_type: Some(DataType::Integer(None)), - params: CreateFunctionBody { - language: Some("SQL".into()), - behavior: Some(FunctionBehavior::Immutable), - called_on_null: Some(FunctionCalledOnNull::ReturnsNullOnNullInput), - parallel: Some(FunctionParallel::Restricted), - return_: Some(Expr::BinaryOp { - left: Box::new(Expr::Identifier("a".into())), - op: BinaryOperator::Plus, - right: Box::new(Expr::Identifier("b".into())), - }), - ..Default::default() - }, - } - ); - - let sql = "CREATE OR REPLACE FUNCTION add(a INTEGER, IN b INTEGER = 1) RETURNS INTEGER LANGUAGE SQL STABLE CALLED ON NULL INPUT PARALLEL UNSAFE RETURN a + b"; - assert_eq!( - pg_and_generic().verified_stmt(sql), - Statement::CreateFunction { - or_replace: true, - temporary: false, - name: ObjectName(vec![Ident::new("add")]), - args: Some(vec![ - OperateFunctionArg::with_name("a", DataType::Integer(None)), - OperateFunctionArg { - mode: Some(ArgMode::In), - name: Some("b".into()), - data_type: DataType::Integer(None), - default_expr: Some(Expr::Value(Value::Number("1".parse().unwrap(), false))), - } - ]), - return_type: Some(DataType::Integer(None)), - params: CreateFunctionBody { - language: Some("SQL".into()), - behavior: Some(FunctionBehavior::Stable), - called_on_null: Some(FunctionCalledOnNull::CalledOnNullInput), - parallel: Some(FunctionParallel::Unsafe), - return_: Some(Expr::BinaryOp { - left: Box::new(Expr::Identifier("a".into())), - op: BinaryOperator::Plus, - right: Box::new(Expr::Identifier("b".into())), - }), - ..Default::default() - }, - } - ); - - let sql = r#"CREATE OR REPLACE FUNCTION increment(i INTEGER) RETURNS INTEGER LANGUAGE plpgsql AS $$ BEGIN RETURN i + 1; END; $$"#; - assert_eq!( - pg().verified_stmt(sql), - Statement::CreateFunction { - or_replace: true, - temporary: false, - name: ObjectName(vec![Ident::new("increment")]), - args: Some(vec![OperateFunctionArg::with_name( - "i", - DataType::Integer(None) - )]), - return_type: Some(DataType::Integer(None)), - params: CreateFunctionBody { - language: Some("plpgsql".into()), - behavior: None, - called_on_null: None, - parallel: None, - return_: None, - as_: Some(FunctionDefinition::DoubleDollarDef( - " BEGIN RETURN i + 1; END; ".into() - )), - using: None - }, - } - ); } +#[test] +fn parse_create_function_detailed() { + pg_and_generic().verified_stmt("CREATE OR REPLACE FUNCTION add(a INTEGER, IN b INTEGER = 1) RETURNS INTEGER LANGUAGE SQL IMMUTABLE PARALLEL RESTRICTED RETURN a + b"); + pg_and_generic().verified_stmt("CREATE OR REPLACE FUNCTION add(a INTEGER, IN b INTEGER = 1) RETURNS INTEGER LANGUAGE SQL IMMUTABLE RETURNS NULL ON NULL INPUT PARALLEL RESTRICTED RETURN a + b"); + pg_and_generic().verified_stmt("CREATE OR REPLACE FUNCTION add(a INTEGER, IN b INTEGER = 1) RETURNS INTEGER LANGUAGE SQL STABLE PARALLEL UNSAFE RETURN a + b"); + pg_and_generic().verified_stmt("CREATE OR REPLACE FUNCTION add(a INTEGER, IN b INTEGER = 1) RETURNS INTEGER LANGUAGE SQL STABLE CALLED ON NULL INPUT PARALLEL UNSAFE RETURN a + b"); + pg_and_generic().verified_stmt(r#"CREATE OR REPLACE FUNCTION increment(i INTEGER) RETURNS INTEGER LANGUAGE plpgsql AS $$ BEGIN RETURN i + 1; END; $$"#); +} #[test] fn parse_incorrect_create_function_parallel() { let sql = "CREATE FUNCTION add(INTEGER, INTEGER) RETURNS INTEGER LANGUAGE SQL PARALLEL BLAH AS 'select $1 + $2;'"; From 83c5d8191bef0b1dcdc4b8c9bae6657811d9df4c Mon Sep 17 00:00:00 2001 From: Nikita-str <42584606+Nikita-str@users.noreply.github.com> Date: Sun, 7 Apr 2024 15:12:48 +0300 Subject: [PATCH 11/42] solve `stack overflow` on `RecursionLimitExceeded` during debug building (#1171) Co-authored-by: Andrew Lamb --- src/parser/mod.rs | 97 +++++++++++++++++++++++++++++++---------------- 1 file changed, 64 insertions(+), 33 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 231de4d20..fcb3e3391 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -470,7 +470,7 @@ impl<'a> Parser<'a> { Keyword::ANALYZE => Ok(self.parse_analyze()?), Keyword::SELECT | Keyword::WITH | Keyword::VALUES => { self.prev_token(); - Ok(Statement::Query(Box::new(self.parse_query()?))) + Ok(Statement::Query(self.parse_boxed_query()?)) } Keyword::TRUNCATE => Ok(self.parse_truncate()?), Keyword::ATTACH => Ok(self.parse_attach_database()?), @@ -530,7 +530,7 @@ impl<'a> Parser<'a> { }, Token::LParen => { self.prev_token(); - Ok(Statement::Query(Box::new(self.parse_query()?))) + Ok(Statement::Query(self.parse_boxed_query()?)) } _ => self.expected("an SQL statement", next_token), } @@ -1084,7 +1084,7 @@ impl<'a> Parser<'a> { let expr = if self.parse_keyword(Keyword::SELECT) || self.parse_keyword(Keyword::WITH) { self.prev_token(); - Expr::Subquery(Box::new(self.parse_query()?)) + Expr::Subquery(self.parse_boxed_query()?) } else { let exprs = self.parse_comma_separated(Parser::parse_expr)?; match exprs.len() { @@ -1465,7 +1465,7 @@ impl<'a> Parser<'a> { self.expect_token(&Token::LParen)?; let exists_node = Expr::Exists { negated, - subquery: Box::new(self.parse_query()?), + subquery: self.parse_boxed_query()?, }; self.expect_token(&Token::RParen)?; Ok(exists_node) @@ -1654,9 +1654,9 @@ impl<'a> Parser<'a> { // Parses an array constructed from a subquery pub fn parse_array_subquery(&mut self) -> Result { - let query = self.parse_query()?; + let query = self.parse_boxed_query()?; self.expect_token(&Token::RParen)?; - Ok(Expr::ArraySubquery(Box::new(query))) + Ok(Expr::ArraySubquery(query)) } /// Parse a SQL LISTAGG expression, e.g. `LISTAGG(...) WITHIN GROUP (ORDER BY ...)`. @@ -2554,7 +2554,7 @@ impl<'a> Parser<'a> { self.prev_token(); Expr::InSubquery { expr: Box::new(expr), - subquery: Box::new(self.parse_query()?), + subquery: self.parse_boxed_query()?, negated, } } else { @@ -3637,7 +3637,7 @@ impl<'a> Parser<'a> { } self.expect_keyword(Keyword::AS)?; - let query = Box::new(self.parse_query()?); + let query = self.parse_boxed_query()?; // Optional `WITH [ CASCADED | LOCAL ] CHECK OPTION` is widely supported here. let with_no_schema_binding = dialect_of!(self is RedshiftSqlDialect | GenericDialect) @@ -4032,7 +4032,7 @@ impl<'a> Parser<'a> { self.expect_keyword(Keyword::FOR)?; - let query = Some(Box::new(self.parse_query()?)); + let query = Some(self.parse_boxed_query()?); Ok(Statement::Declare { stmts: vec![Declare { @@ -4126,7 +4126,7 @@ impl<'a> Parser<'a> { match self.peek_token().token { Token::Word(w) if w.keyword == Keyword::SELECT => ( Some(DeclareType::Cursor), - Some(Box::new(self.parse_query()?)), + Some(self.parse_boxed_query()?), None, None, ), @@ -4650,7 +4650,7 @@ impl<'a> Parser<'a> { // Parse optional `AS ( query )` let query = if self.parse_keyword(Keyword::AS) { - Some(Box::new(self.parse_query()?)) + Some(self.parse_boxed_query()?) } else { None }; @@ -5646,7 +5646,7 @@ impl<'a> Parser<'a> { let with_options = self.parse_options(Keyword::WITH)?; self.expect_keyword(Keyword::AS)?; - let query = Box::new(self.parse_query()?); + let query = self.parse_boxed_query()?; Ok(Statement::AlterView { name, @@ -5686,7 +5686,7 @@ impl<'a> Parser<'a> { pub fn parse_copy(&mut self) -> Result { let source; if self.consume_token(&Token::LParen) { - source = CopySource::Query(Box::new(self.parse_query()?)); + source = CopySource::Query(self.parse_boxed_query()?); self.expect_token(&Token::RParen)?; } else { let table_name = self.parse_object_name(false)?; @@ -6910,6 +6910,15 @@ impl<'a> Parser<'a> { } } + /// Call's [`Self::parse_query`] returning a `Box`'ed result. + /// + /// This function can be used to reduce the stack size required in debug + /// builds. Instead of `sizeof(Query)` only a pointer (`Box`) + /// is used. + fn parse_boxed_query(&mut self) -> Result, ParserError> { + self.parse_query().map(Box::new) + } + /// Parse a query expression, i.e. a `SELECT` statement optionally /// preceded with some `WITH` CTE declarations and optionally followed /// by `ORDER BY`. Unlike some other parse_... methods, this one doesn't @@ -6924,13 +6933,10 @@ impl<'a> Parser<'a> { } else { None }; - if self.parse_keyword(Keyword::INSERT) { - let insert = self.parse_insert()?; - Ok(Query { with, - body: Box::new(SetExpr::Insert(insert)), + body: self.parse_insert_setexpr_boxed()?, limit: None, limit_by: vec![], order_by: vec![], @@ -6940,10 +6946,9 @@ impl<'a> Parser<'a> { for_clause: None, }) } else if self.parse_keyword(Keyword::UPDATE) { - let update = self.parse_update()?; Ok(Query { with, - body: Box::new(SetExpr::Update(update)), + body: self.parse_update_setexpr_boxed()?, limit: None, limit_by: vec![], order_by: vec![], @@ -6953,7 +6958,7 @@ impl<'a> Parser<'a> { for_clause: None, }) } else { - let body = Box::new(self.parse_query_body(0)?); + let body = self.parse_boxed_query_body(0)?; let order_by = if self.parse_keywords(&[Keyword::ORDER, Keyword::BY]) { self.parse_comma_separated(Parser::parse_order_by_expr)? @@ -7143,7 +7148,7 @@ impl<'a> Parser<'a> { } } self.expect_token(&Token::LParen)?; - let query = Box::new(self.parse_query()?); + let query = self.parse_boxed_query()?; self.expect_token(&Token::RParen)?; let alias = TableAlias { name, @@ -7167,7 +7172,7 @@ impl<'a> Parser<'a> { } } self.expect_token(&Token::LParen)?; - let query = Box::new(self.parse_query()?); + let query = self.parse_boxed_query()?; self.expect_token(&Token::RParen)?; let alias = TableAlias { name, columns }; Cte { @@ -7183,6 +7188,15 @@ impl<'a> Parser<'a> { Ok(cte) } + /// Call's [`Self::parse_query_body`] returning a `Box`'ed result. + /// + /// This function can be used to reduce the stack size required in debug + /// builds. Instead of `sizeof(QueryBody)` only a pointer (`Box`) + /// is used. + fn parse_boxed_query_body(&mut self, precedence: u8) -> Result, ParserError> { + self.parse_query_body(precedence).map(Box::new) + } + /// Parse a "query body", which is an expression with roughly the /// following grammar: /// ```sql @@ -7191,16 +7205,19 @@ impl<'a> Parser<'a> { /// subquery ::= query_body [ order_by_limit ] /// set_operation ::= query_body { 'UNION' | 'EXCEPT' | 'INTERSECT' } [ 'ALL' ] query_body /// ``` + /// + /// If you need `Box` then maybe there is sense to use `parse_boxed_query_body` + /// due to prevent stack overflow in debug building(to reserve less memory on stack). pub fn parse_query_body(&mut self, precedence: u8) -> Result { // We parse the expression using a Pratt parser, as in `parse_expr()`. // Start by parsing a restricted SELECT or a `(subquery)`: let mut expr = if self.parse_keyword(Keyword::SELECT) { - SetExpr::Select(Box::new(self.parse_select()?)) + SetExpr::Select(self.parse_select().map(Box::new)?) } else if self.consume_token(&Token::LParen) { // CTEs are not allowed here, but the parser currently accepts them - let subquery = self.parse_query()?; + let subquery = self.parse_boxed_query()?; self.expect_token(&Token::RParen)?; - SetExpr::Query(Box::new(subquery)) + SetExpr::Query(subquery) } else if self.parse_keyword(Keyword::VALUES) { let is_mysql = dialect_of!(self is MySqlDialect); SetExpr::Values(self.parse_values(is_mysql)?) @@ -7233,7 +7250,7 @@ impl<'a> Parser<'a> { left: Box::new(expr), op: op.unwrap(), set_quantifier, - right: Box::new(self.parse_query_body(next_precedence)?), + right: self.parse_boxed_query_body(next_precedence)?, }; } @@ -8147,7 +8164,7 @@ impl<'a> Parser<'a> { &mut self, lateral: IsLateral, ) -> Result { - let subquery = Box::new(self.parse_query()?); + let subquery = self.parse_boxed_query()?; self.expect_token(&Token::RParen)?; let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?; Ok(TableFactor::Derived { @@ -8395,6 +8412,13 @@ impl<'a> Parser<'a> { Ok(insert.clone()) } + /// Parse an INSERT statement, returning a `Box`ed SetExpr + /// + /// This is used to reduce the size of the stack frames in debug builds + fn parse_insert_setexpr_boxed(&mut self) -> Result, ParserError> { + Ok(Box::new(SetExpr::Insert(self.parse_insert()?))) + } + /// Parse an INSERT statement pub fn parse_insert(&mut self) -> Result { let or = if !dialect_of!(self is SQLiteDialect) { @@ -8445,7 +8469,7 @@ impl<'a> Parser<'a> { } else { None }; - let source = Box::new(self.parse_query()?); + let source = self.parse_boxed_query()?; Ok(Statement::Directory { local, path, @@ -8478,7 +8502,7 @@ impl<'a> Parser<'a> { // Hive allows you to specify columns after partitions as well if you want. let after_columns = self.parse_parenthesized_column_list(Optional, false)?; - let source = Some(Box::new(self.parse_query()?)); + let source = Some(self.parse_boxed_query()?); (columns, partitioned, after_columns, source) }; @@ -8581,6 +8605,13 @@ impl<'a> Parser<'a> { } } + /// Parse an UPDATE statement, returning a `Box`ed SetExpr + /// + /// This is used to reduce the size of the stack frames in debug builds + fn parse_update_setexpr_boxed(&mut self) -> Result, ParserError> { + Ok(Box::new(SetExpr::Update(self.parse_update()?))) + } + pub fn parse_update(&mut self) -> Result { let table = self.parse_table_and_joins()?; self.expect_keyword(Keyword::SET)?; @@ -8686,11 +8717,11 @@ impl<'a> Parser<'a> { .is_some() { self.prev_token(); - let subquery = self.parse_query()?; + let subquery = self.parse_boxed_query()?; self.expect_token(&Token::RParen)?; return Ok(( vec![FunctionArg::Unnamed(FunctionArgExpr::from(Expr::Subquery( - Box::new(subquery), + subquery, )))], vec![], )); @@ -9204,7 +9235,7 @@ impl<'a> Parser<'a> { pub fn parse_unload(&mut self) -> Result { self.expect_token(&Token::LParen)?; - let query = self.parse_query()?; + let query = self.parse_boxed_query()?; self.expect_token(&Token::RParen)?; self.expect_keyword(Keyword::TO)?; @@ -9213,7 +9244,7 @@ impl<'a> Parser<'a> { let with_options = self.parse_options(Keyword::WITH)?; Ok(Statement::Unload { - query: Box::new(query), + query, to, with: with_options, }) From 23103302e62b13351ecc33703bfdcef609166f41 Mon Sep 17 00:00:00 2001 From: Nikita-str <42584606+Nikita-str@users.noreply.github.com> Date: Sun, 7 Apr 2024 15:20:21 +0300 Subject: [PATCH 12/42] Support named windows in `OVER (window_definition)` clause (#1166) Co-authored-by: Andrew Lamb --- src/ast/mod.rs | 15 ++++++++- src/parser/mod.rs | 9 +++++ tests/sqlparser_common.rs | 69 ++++++++++++++++++++++++++++++++++++++- tests/sqlparser_sqlite.rs | 1 + 4 files changed, 92 insertions(+), 2 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 9b3bf3f62..dfdc86e06 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -1246,11 +1246,19 @@ impl Display for WindowType { } } -/// A window specification (i.e. `OVER (PARTITION BY .. ORDER BY .. etc.)`) +/// A window specification (i.e. `OVER ([window_name] PARTITION BY .. ORDER BY .. etc.)`) #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct WindowSpec { + /// Optional window name. + /// + /// You can find it at least in [MySQL][1], [BigQuery][2], [PostgreSQL][3] + /// + /// [1]: https://dev.mysql.com/doc/refman/8.0/en/window-functions-named-windows.html + /// [2]: https://cloud.google.com/bigquery/docs/reference/standard-sql/window-function-calls + /// [3]: https://www.postgresql.org/docs/current/sql-expressions.html#SYNTAX-WINDOW-FUNCTIONS + pub window_name: Option, /// `OVER (PARTITION BY ...)` pub partition_by: Vec, /// `OVER (ORDER BY ...)` @@ -1262,7 +1270,12 @@ pub struct WindowSpec { impl fmt::Display for WindowSpec { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { let mut delim = ""; + if let Some(window_name) = &self.window_name { + delim = " "; + write!(f, "{window_name}")?; + } if !self.partition_by.is_empty() { + f.write_str(delim)?; delim = " "; write!( f, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index fcb3e3391..568d89e36 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -9528,6 +9528,13 @@ impl<'a> Parser<'a> { } pub fn parse_window_spec(&mut self) -> Result { + let window_name = match self.peek_token().token { + Token::Word(word) if word.keyword == Keyword::NoKeyword => { + self.maybe_parse(|parser| parser.parse_identifier(false)) + } + _ => None, + }; + let partition_by = if self.parse_keywords(&[Keyword::PARTITION, Keyword::BY]) { self.parse_comma_separated(Parser::parse_expr)? } else { @@ -9538,6 +9545,7 @@ impl<'a> Parser<'a> { } else { vec![] }; + let window_frame = if !self.consume_token(&Token::RParen) { let window_frame = self.parse_window_frame()?; self.expect_token(&Token::RParen)?; @@ -9546,6 +9554,7 @@ impl<'a> Parser<'a> { None }; Ok(WindowSpec { + window_name, partition_by, order_by, window_frame, diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index f78eda0cc..6e33dce9b 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -2062,6 +2062,7 @@ fn parse_select_qualify() { null_treatment: None, filter: None, over: Some(WindowType::WindowSpec(WindowSpec { + window_name: None, partition_by: vec![Expr::Identifier(Ident::new("p"))], order_by: vec![OrderByExpr { expr: Expr::Identifier(Ident::new("o")), @@ -4122,7 +4123,10 @@ fn parse_window_functions() { GROUPS BETWEEN 1 PRECEDING AND 1 FOLLOWING) \ FROM foo"; let select = verified_only_select(sql); - assert_eq!(7, select.projection.len()); + + const EXPECTED_PROJ_QTY: usize = 7; + assert_eq!(EXPECTED_PROJ_QTY, select.projection.len()); + assert_eq!( &Expr::Function(Function { name: ObjectName(vec![Ident::new("row_number")]), @@ -4130,6 +4134,7 @@ fn parse_window_functions() { null_treatment: None, filter: None, over: Some(WindowType::WindowSpec(WindowSpec { + window_name: None, partition_by: vec![], order_by: vec![OrderByExpr { expr: Expr::Identifier(Ident::new("dt")), @@ -4144,6 +4149,66 @@ fn parse_window_functions() { }), expr_from_projection(&select.projection[0]) ); + + for i in 0..EXPECTED_PROJ_QTY { + assert!(matches!( + expr_from_projection(&select.projection[i]), + Expr::Function(Function { + over: Some(WindowType::WindowSpec(WindowSpec { + window_name: None, + .. + })), + .. + }) + )); + } +} + +#[test] +fn parse_named_window_functions() { + let supported_dialects = TestedDialects { + dialects: vec![ + Box::new(GenericDialect {}), + Box::new(PostgreSqlDialect {}), + Box::new(MySqlDialect {}), + Box::new(BigQueryDialect {}), + ], + options: None, + }; + + let sql = "SELECT row_number() OVER (w ORDER BY dt DESC), \ + sum(foo) OVER (win PARTITION BY a, b ORDER BY c, d \ + ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) \ + FROM foo \ + WINDOW w AS (PARTITION BY x), win AS (ORDER BY y)"; + supported_dialects.verified_stmt(sql); + + let select = verified_only_select(sql); + + const EXPECTED_PROJ_QTY: usize = 2; + assert_eq!(EXPECTED_PROJ_QTY, select.projection.len()); + + const EXPECTED_WIN_NAMES: [&str; 2] = ["w", "win"]; + for (i, win_name) in EXPECTED_WIN_NAMES.iter().enumerate() { + assert!(matches!( + expr_from_projection(&select.projection[i]), + Expr::Function(Function { + over: Some(WindowType::WindowSpec(WindowSpec { + window_name: Some(Ident { value, .. }), + .. + })), + .. + }) if value == win_name + )); + } + + let sql = "SELECT \ + FIRST_VALUE(x) OVER (w ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS first, \ + FIRST_VALUE(x) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS last, \ + SUM(y) OVER (win PARTITION BY x) AS last \ + FROM EMPLOYEE \ + WINDOW w AS (PARTITION BY x), win AS (w ORDER BY y)"; + supported_dialects.verified_stmt(sql); } #[test] @@ -4244,6 +4309,7 @@ fn test_parse_named_window() { quote_style: None, }, WindowSpec { + window_name: None, partition_by: vec![], order_by: vec![OrderByExpr { expr: Expr::Identifier(Ident { @@ -4262,6 +4328,7 @@ fn test_parse_named_window() { quote_style: None, }, WindowSpec { + window_name: None, partition_by: vec![Expr::Identifier(Ident { value: "C11".to_string(), quote_style: None, diff --git a/tests/sqlparser_sqlite.rs b/tests/sqlparser_sqlite.rs index 0352b4ec6..c9d5d98cd 100644 --- a/tests/sqlparser_sqlite.rs +++ b/tests/sqlparser_sqlite.rs @@ -446,6 +446,7 @@ fn parse_window_function_with_filter() { ))], null_treatment: None, over: Some(WindowType::WindowSpec(WindowSpec { + window_name: None, partition_by: vec![], order_by: vec![], window_frame: None, From 732e1ec1fc1abe23ead675a04f70dc0a2806ef97 Mon Sep 17 00:00:00 2001 From: Ifeanyi Ubah Date: Sun, 7 Apr 2024 14:31:04 +0200 Subject: [PATCH 13/42] BigQuery support inline comment with hash syntax (#1192) --- src/tokenizer.rs | 2 +- tests/sqlparser_common.rs | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/src/tokenizer.rs b/src/tokenizer.rs index e31fccca9..1ceec705b 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -984,7 +984,7 @@ impl<'a> Tokenizer<'a> { } '{' => self.consume_and_return(chars, Token::LBrace), '}' => self.consume_and_return(chars, Token::RBrace), - '#' if dialect_of!(self is SnowflakeDialect) => { + '#' if dialect_of!(self is SnowflakeDialect | BigQueryDialect) => { chars.next(); // consume the '#', starting a snowflake single-line comment let comment = self.tokenize_single_line_comment(chars); Ok(Some(Token::Whitespace(Whitespace::SingleLineComment { diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 6e33dce9b..f474e1166 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -8607,6 +8607,24 @@ fn test_release_savepoint() { one_statement_parses_to("RELEASE test1", "RELEASE SAVEPOINT test1"); } +#[test] +fn test_comment_hash_syntax() { + let dialects = TestedDialects { + dialects: vec![Box::new(BigQueryDialect {}), Box::new(SnowflakeDialect {})], + options: None, + }; + let sql = r#" + # comment + SELECT a, b, c # , d, e + FROM T + ####### comment ################# + WHERE true + # comment + "#; + let canonical = "SELECT a, b, c FROM T WHERE true"; + dialects.verified_only_select_with_canonical(sql, canonical); +} + #[test] fn test_buffer_reuse() { let d = GenericDialect {}; From 20c57547847b353a797dc830b1449a3d6b9135ad Mon Sep 17 00:00:00 2001 From: xring Date: Sun, 7 Apr 2024 20:43:23 +0800 Subject: [PATCH 14/42] Support `[FIRST | AFTER column_name]` support in `ALTER TABLE` for MySQL (#1180) --- src/ast/ddl.rs | 25 +++-- src/ast/mod.rs | 22 +++++ src/keywords.rs | 1 + src/parser/mod.rs | 22 +++++ tests/sqlparser_common.rs | 2 + tests/sqlparser_mysql.rs | 189 ++++++++++++++++++++++++++++++++++++ tests/sqlparser_postgres.rs | 2 + 7 files changed, 257 insertions(+), 6 deletions(-) diff --git a/src/ast/ddl.rs b/src/ast/ddl.rs index 9e3137d94..080e8c4da 100644 --- a/src/ast/ddl.rs +++ b/src/ast/ddl.rs @@ -25,8 +25,8 @@ use sqlparser_derive::{Visit, VisitMut}; use crate::ast::value::escape_single_quote_string; use crate::ast::{ - display_comma_separated, display_separated, DataType, Expr, Ident, ObjectName, SequenceOptions, - SqlOption, + display_comma_separated, display_separated, DataType, Expr, Ident, MySQLColumnPosition, + ObjectName, SequenceOptions, SqlOption, }; use crate::tokenizer::Token; @@ -45,6 +45,8 @@ pub enum AlterTableOperation { if_not_exists: bool, /// . column_def: ColumnDef, + /// MySQL `ALTER TABLE` only [FIRST | AFTER column_name] + column_position: Option, }, /// `DISABLE ROW LEVEL SECURITY` /// @@ -129,6 +131,8 @@ pub enum AlterTableOperation { new_name: Ident, data_type: DataType, options: Vec, + /// MySQL `ALTER TABLE` only [FIRST | AFTER column_name] + column_position: Option, }, /// `RENAME CONSTRAINT TO ` /// @@ -171,6 +175,7 @@ impl fmt::Display for AlterTableOperation { column_keyword, if_not_exists, column_def, + column_position, } => { write!(f, "ADD")?; if *column_keyword { @@ -181,6 +186,10 @@ impl fmt::Display for AlterTableOperation { } write!(f, " {column_def}")?; + if let Some(position) = column_position { + write!(f, " {position}")?; + } + Ok(()) } AlterTableOperation::AlterColumn { column_name, op } => { @@ -271,13 +280,17 @@ impl fmt::Display for AlterTableOperation { new_name, data_type, options, + column_position, } => { write!(f, "CHANGE COLUMN {old_name} {new_name} {data_type}")?; - if options.is_empty() { - Ok(()) - } else { - write!(f, " {}", display_separated(options, " ")) + if !options.is_empty() { + write!(f, " {}", display_separated(options, " "))?; } + if let Some(position) = column_position { + write!(f, " {position}")?; + } + + Ok(()) } AlterTableOperation::RenameConstraint { old_name, new_name } => { write!(f, "RENAME CONSTRAINT {old_name} TO {new_name}") diff --git a/src/ast/mod.rs b/src/ast/mod.rs index dfdc86e06..2eebbc604 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -6018,6 +6018,28 @@ impl fmt::Display for HiveSetLocation { } } +/// MySQL `ALTER TABLE` only [FIRST | AFTER column_name] +#[allow(clippy::large_enum_variant)] +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum MySQLColumnPosition { + First, + After(Ident), +} + +impl Display for MySQLColumnPosition { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + MySQLColumnPosition::First => Ok(write!(f, "FIRST")?), + MySQLColumnPosition::After(ident) => { + let column_name = &ident.value; + Ok(write!(f, "AFTER {column_name}")?) + } + } + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/src/keywords.rs b/src/keywords.rs index fa7d133e3..91842672d 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -73,6 +73,7 @@ define_keywords!( ACTION, ADD, ADMIN, + AFTER, AGAINST, ALL, ALLOCATE, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 568d89e36..57e24d218 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -5358,10 +5358,14 @@ impl<'a> Parser<'a> { }; let column_def = self.parse_column_def()?; + + let column_position = self.parse_column_position()?; + AlterTableOperation::AddColumn { column_keyword, if_not_exists, column_def, + column_position, } } } @@ -5490,11 +5494,14 @@ impl<'a> Parser<'a> { options.push(option); } + let column_position = self.parse_column_position()?; + AlterTableOperation::ChangeColumn { old_name, new_name, data_type, options, + column_position, } } else if self.parse_keyword(Keyword::ALTER) { let _ = self.parse_keyword(Keyword::COLUMN); // [ COLUMN ] @@ -9608,6 +9615,21 @@ impl<'a> Parser<'a> { Ok(partitions) } + fn parse_column_position(&mut self) -> Result, ParserError> { + if dialect_of!(self is MySqlDialect | GenericDialect) { + if self.parse_keyword(Keyword::FIRST) { + Ok(Some(MySQLColumnPosition::First)) + } else if self.parse_keyword(Keyword::AFTER) { + let ident = self.parse_identifier(false)?; + Ok(Some(MySQLColumnPosition::After(ident))) + } else { + Ok(None) + } + } else { + Ok(None) + } + } + /// Consume the parser and return its underlying token buffer pub fn into_tokens(self) -> Vec { self.tokens diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index f474e1166..c8551e1fe 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -3512,11 +3512,13 @@ fn parse_alter_table() { column_keyword, if_not_exists, column_def, + column_position, } => { assert!(column_keyword); assert!(!if_not_exists); assert_eq!("foo", column_def.name.to_string()); assert_eq!("TEXT", column_def.data_type.to_string()); + assert_eq!(None, column_position); } _ => unreachable!(), }; diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index 8ffb78ae2..59314c1d9 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -1875,6 +1875,120 @@ fn parse_delete_with_limit() { } } +#[test] +fn parse_alter_table_add_column() { + match mysql().verified_stmt("ALTER TABLE tab ADD COLUMN b INT FIRST") { + Statement::AlterTable { + name, + if_exists, + only, + operations, + location: _, + } => { + assert_eq!(name.to_string(), "tab"); + assert!(!if_exists); + assert!(!only); + assert_eq!( + operations, + vec![AlterTableOperation::AddColumn { + column_keyword: true, + if_not_exists: false, + column_def: ColumnDef { + name: "b".into(), + data_type: DataType::Int(None), + collation: None, + options: vec![], + }, + column_position: Some(MySQLColumnPosition::First), + },] + ); + } + _ => unreachable!(), + } + + match mysql().verified_stmt("ALTER TABLE tab ADD COLUMN b INT AFTER foo") { + Statement::AlterTable { + name, + if_exists, + only, + operations, + location: _, + } => { + assert_eq!(name.to_string(), "tab"); + assert!(!if_exists); + assert!(!only); + assert_eq!( + operations, + vec![AlterTableOperation::AddColumn { + column_keyword: true, + if_not_exists: false, + column_def: ColumnDef { + name: "b".into(), + data_type: DataType::Int(None), + collation: None, + options: vec![], + }, + column_position: Some(MySQLColumnPosition::After(Ident { + value: String::from("foo"), + quote_style: None + })), + },] + ); + } + _ => unreachable!(), + } +} + +#[test] +fn parse_alter_table_add_columns() { + match mysql() + .verified_stmt("ALTER TABLE tab ADD COLUMN a TEXT FIRST, ADD COLUMN b INT AFTER foo") + { + Statement::AlterTable { + name, + if_exists, + only, + operations, + location: _, + } => { + assert_eq!(name.to_string(), "tab"); + assert!(!if_exists); + assert!(!only); + assert_eq!( + operations, + vec![ + AlterTableOperation::AddColumn { + column_keyword: true, + if_not_exists: false, + column_def: ColumnDef { + name: "a".into(), + data_type: DataType::Text, + collation: None, + options: vec![], + }, + column_position: Some(MySQLColumnPosition::First), + }, + AlterTableOperation::AddColumn { + column_keyword: true, + if_not_exists: false, + column_def: ColumnDef { + name: "b".into(), + data_type: DataType::Int(None), + collation: None, + options: vec![], + }, + column_position: Some(MySQLColumnPosition::After(Ident { + value: String::from("foo"), + quote_style: None, + })), + }, + ] + ); + } + _ => unreachable!(), + } +} + #[test] fn parse_alter_table_drop_primary_key() { assert_matches!( @@ -1891,6 +2005,7 @@ fn parse_alter_table_change_column() { new_name: Ident::new("desc"), data_type: DataType::Text, options: vec![ColumnOption::NotNull], + column_position: None, }; let sql1 = "ALTER TABLE orders CHANGE COLUMN description desc TEXT NOT NULL"; @@ -1904,6 +2019,80 @@ fn parse_alter_table_change_column() { &expected_name.to_string(), ); assert_eq!(expected_operation, operation); + + let expected_operation = AlterTableOperation::ChangeColumn { + old_name: Ident::new("description"), + new_name: Ident::new("desc"), + data_type: DataType::Text, + options: vec![ColumnOption::NotNull], + column_position: Some(MySQLColumnPosition::First), + }; + let sql3 = "ALTER TABLE orders CHANGE COLUMN description desc TEXT NOT NULL FIRST"; + let operation = + alter_table_op_with_name(mysql().verified_stmt(sql3), &expected_name.to_string()); + assert_eq!(expected_operation, operation); + + let expected_operation = AlterTableOperation::ChangeColumn { + old_name: Ident::new("description"), + new_name: Ident::new("desc"), + data_type: DataType::Text, + options: vec![ColumnOption::NotNull], + column_position: Some(MySQLColumnPosition::After(Ident { + value: String::from("foo"), + quote_style: None, + })), + }; + let sql4 = "ALTER TABLE orders CHANGE COLUMN description desc TEXT NOT NULL AFTER foo"; + let operation = + alter_table_op_with_name(mysql().verified_stmt(sql4), &expected_name.to_string()); + assert_eq!(expected_operation, operation); +} + +#[test] +fn parse_alter_table_change_column_with_column_position() { + let expected_name = ObjectName(vec![Ident::new("orders")]); + let expected_operation_first = AlterTableOperation::ChangeColumn { + old_name: Ident::new("description"), + new_name: Ident::new("desc"), + data_type: DataType::Text, + options: vec![ColumnOption::NotNull], + column_position: Some(MySQLColumnPosition::First), + }; + + let sql1 = "ALTER TABLE orders CHANGE COLUMN description desc TEXT NOT NULL FIRST"; + let operation = + alter_table_op_with_name(mysql().verified_stmt(sql1), &expected_name.to_string()); + assert_eq!(expected_operation_first, operation); + + let sql2 = "ALTER TABLE orders CHANGE description desc TEXT NOT NULL FIRST"; + let operation = alter_table_op_with_name( + mysql().one_statement_parses_to(sql2, sql1), + &expected_name.to_string(), + ); + assert_eq!(expected_operation_first, operation); + + let expected_operation_after = AlterTableOperation::ChangeColumn { + old_name: Ident::new("description"), + new_name: Ident::new("desc"), + data_type: DataType::Text, + options: vec![ColumnOption::NotNull], + column_position: Some(MySQLColumnPosition::After(Ident { + value: String::from("total_count"), + quote_style: None, + })), + }; + + let sql1 = "ALTER TABLE orders CHANGE COLUMN description desc TEXT NOT NULL AFTER total_count"; + let operation = + alter_table_op_with_name(mysql().verified_stmt(sql1), &expected_name.to_string()); + assert_eq!(expected_operation_after, operation); + + let sql2 = "ALTER TABLE orders CHANGE description desc TEXT NOT NULL AFTER total_count"; + let operation = alter_table_op_with_name( + mysql().one_statement_parses_to(sql2, sql1), + &expected_name.to_string(), + ); + assert_eq!(expected_operation_after, operation); } #[test] diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 3747aef70..ea5c9875b 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -694,6 +694,7 @@ fn parse_alter_table_add_columns() { collation: None, options: vec![], }, + column_position: None, }, AlterTableOperation::AddColumn { column_keyword: true, @@ -704,6 +705,7 @@ fn parse_alter_table_add_columns() { collation: None, options: vec![], }, + column_position: None, }, ] ); From 17ef71e42b72df46ee133b9e242fa93881d05a66 Mon Sep 17 00:00:00 2001 From: Maciej Obuchowski Date: Sun, 7 Apr 2024 14:45:59 +0200 Subject: [PATCH 15/42] Fix parse `COPY INTO` stage names without parens for SnowFlake (#1187) Signed-off-by: Maciej Obuchowski --- src/dialect/snowflake.rs | 6 +++++- tests/sqlparser_snowflake.rs | 42 ++++++++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+), 1 deletion(-) diff --git a/src/dialect/snowflake.rs b/src/dialect/snowflake.rs index 8ffaf5944..1d9d983e5 100644 --- a/src/dialect/snowflake.rs +++ b/src/dialect/snowflake.rs @@ -155,6 +155,10 @@ pub fn parse_stage_name_identifier(parser: &mut Parser) -> Result { + parser.prev_token(); + break; + } Token::AtSign => ident.push('@'), Token::Tilde => ident.push('~'), Token::Mod => ident.push('%'), @@ -219,7 +223,7 @@ pub fn parse_copy_into(parser: &mut Parser) -> Result { } _ => { parser.prev_token(); - from_stage = parser.parse_object_name(false)?; + from_stage = parse_snowflake_stage_name(parser)?; stage_params = parse_stage_params(parser)?; // as diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index 65755f685..49b440506 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -1337,6 +1337,48 @@ fn test_snowflake_stage_object_names() { } } +#[test] +fn test_snowflake_copy_into() { + let sql = "COPY INTO a.b FROM @namespace.stage_name"; + assert_eq!(snowflake().verified_stmt(sql).to_string(), sql); + match snowflake().verified_stmt(sql) { + Statement::CopyIntoSnowflake { + into, from_stage, .. + } => { + assert_eq!(into, ObjectName(vec![Ident::new("a"), Ident::new("b")])); + assert_eq!( + from_stage, + ObjectName(vec![Ident::new("@namespace"), Ident::new("stage_name")]) + ) + } + _ => unreachable!(), + } +} + +#[test] +fn test_snowflake_copy_into_stage_name_ends_with_parens() { + let sql = "COPY INTO SCHEMA.SOME_MONITORING_SYSTEM FROM (SELECT t.$1:st AS st FROM @schema.general_finished)"; + assert_eq!(snowflake().verified_stmt(sql).to_string(), sql); + match snowflake().verified_stmt(sql) { + Statement::CopyIntoSnowflake { + into, from_stage, .. + } => { + assert_eq!( + into, + ObjectName(vec![ + Ident::new("SCHEMA"), + Ident::new("SOME_MONITORING_SYSTEM") + ]) + ); + assert_eq!( + from_stage, + ObjectName(vec![Ident::new("@schema"), Ident::new("general_finished")]) + ) + } + _ => unreachable!(), + } +} + #[test] fn test_snowflake_trim() { let real_sql = r#"SELECT customer_id, TRIM(sub_items.value:item_price_id, '"', "a") AS item_price_id FROM models_staging.subscriptions"#; From 6da8828c1b664a87f8ffe0cd68609aec046f436c Mon Sep 17 00:00:00 2001 From: yassun7010 <47286750+yassun7010@users.noreply.github.com> Date: Wed, 10 Apr 2024 05:19:27 +0900 Subject: [PATCH 16/42] feat: support tailing commas on snowflake dialect. (#1205) --- src/parser/mod.rs | 5 +++-- tests/sqlparser_snowflake.rs | 5 +++++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 57e24d218..48eaed92c 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -2946,14 +2946,15 @@ impl<'a> Parser<'a> { /// Parse a comma-separated list of 1+ SelectItem pub fn parse_projection(&mut self) -> Result, ParserError> { - // BigQuery allows trailing commas, but only in project lists + // BigQuery and Snowflake allow trailing commas, but only in project lists // e.g. `SELECT 1, 2, FROM t` // https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#trailing_commas + // https://docs.snowflake.com/en/release-notes/2024/8_11#select-supports-trailing-commas // // This pattern could be captured better with RAII type semantics, but it's quite a bit of // code to add for just one case, so we'll just do it manually here. let old_value = self.options.trailing_commas; - self.options.trailing_commas |= dialect_of!(self is BigQueryDialect); + self.options.trailing_commas |= dialect_of!(self is BigQueryDialect | SnowflakeDialect); let ret = self.parse_comma_separated(|p| p.parse_select_item()); self.options.trailing_commas = old_value; diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index 49b440506..880129f82 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -1536,3 +1536,8 @@ fn parse_comma_outer_join() { "SELECT t1.c1, t2.c2 FROM t1, t2 WHERE t1.c1 = t2.c2 (+)", ); } + +#[test] +fn test_sf_trailing_commas() { + snowflake().verified_only_select_with_canonical("SELECT 1, 2, FROM t", "SELECT 1, 2 FROM t"); +} From 8f67d1a713006e7189fafecc8b833ee919438248 Mon Sep 17 00:00:00 2001 From: Nikita-str <42584606+Nikita-str@users.noreply.github.com> Date: Tue, 9 Apr 2024 23:20:24 +0300 Subject: [PATCH 17/42] Support MySQL `UNIQUE` table constraint (#1164) Co-authored-by: Andrew Lamb --- src/ast/ddl.rs | 176 ++++++++++++++++++++++++++--- src/ast/mod.rs | 2 +- src/parser/mod.rs | 121 +++++++++++++++----- tests/sqlparser_mysql.rs | 236 ++++++++++++++++++++++++++++++--------- 4 files changed, 441 insertions(+), 94 deletions(-) diff --git a/src/ast/ddl.rs b/src/ast/ddl.rs index 080e8c4da..d86ebad9d 100644 --- a/src/ast/ddl.rs +++ b/src/ast/ddl.rs @@ -15,7 +15,7 @@ #[cfg(not(feature = "std"))] use alloc::{boxed::Box, string::String, vec::Vec}; -use core::fmt; +use core::fmt::{self, Write}; #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; @@ -397,12 +397,68 @@ impl fmt::Display for AlterColumnOperation { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum TableConstraint { - /// `[ CONSTRAINT ] { PRIMARY KEY | UNIQUE } ()` + /// MySQL [definition][1] for `UNIQUE` constraints statements:\ + /// * `[CONSTRAINT []] UNIQUE [] [index_type] () ` + /// + /// where: + /// * [index_type][2] is `USING {BTREE | HASH}` + /// * [index_options][3] is `{index_type | COMMENT 'string' | ... %currently unsupported stmts% } ...` + /// * [index_type_display][4] is `[INDEX | KEY]` + /// + /// [1]: https://dev.mysql.com/doc/refman/8.3/en/create-table.html + /// [2]: IndexType + /// [3]: IndexOption + /// [4]: KeyOrIndexDisplay Unique { + /// Constraint name. + /// + /// Can be not the same as `index_name` name: Option, + /// Index name + index_name: Option, + /// Whether the type is followed by the keyword `KEY`, `INDEX`, or no keyword at all. + index_type_display: KeyOrIndexDisplay, + /// Optional `USING` of [index type][1] statement before columns. + /// + /// [1]: IndexType + index_type: Option, + /// Identifiers of the columns that are unique. columns: Vec, - /// Whether this is a `PRIMARY KEY` or just a `UNIQUE` constraint - is_primary: bool, + index_options: Vec, + characteristics: Option, + }, + /// MySQL [definition][1] for `PRIMARY KEY` constraints statements:\ + /// * `[CONSTRAINT []] PRIMARY KEY [index_name] [index_type] () ` + /// + /// Actually the specification have no `[index_name]` but the next query will complete successfully: + /// ```sql + /// CREATE TABLE unspec_table ( + /// xid INT NOT NULL, + /// CONSTRAINT p_name PRIMARY KEY index_name USING BTREE (xid) + /// ); + /// ``` + /// + /// where: + /// * [index_type][2] is `USING {BTREE | HASH}` + /// * [index_options][3] is `{index_type | COMMENT 'string' | ... %currently unsupported stmts% } ...` + /// + /// [1]: https://dev.mysql.com/doc/refman/8.3/en/create-table.html + /// [2]: IndexType + /// [3]: IndexOption + PrimaryKey { + /// Constraint name. + /// + /// Can be not the same as `index_name` + name: Option, + /// Index name + index_name: Option, + /// Optional `USING` of [index type][1] statement before columns. + /// + /// [1]: IndexType + index_type: Option, + /// Identifiers of the columns that form the primary key. + columns: Vec, + index_options: Vec, characteristics: Option, }, /// A referential integrity constraint (`[ CONSTRAINT ] FOREIGN KEY () @@ -472,22 +528,51 @@ impl fmt::Display for TableConstraint { match self { TableConstraint::Unique { name, + index_name, + index_type_display, + index_type, columns, - is_primary, + index_options, characteristics, } => { write!( f, - "{}{} ({})", + "{}UNIQUE{index_type_display:>}{}{} ({})", display_constraint_name(name), - if *is_primary { "PRIMARY KEY" } else { "UNIQUE" }, - display_comma_separated(columns) + display_option_spaced(index_name), + display_option(" USING ", "", index_type), + display_comma_separated(columns), )?; - if let Some(characteristics) = characteristics { - write!(f, " {}", characteristics)?; + if !index_options.is_empty() { + write!(f, " {}", display_separated(index_options, " "))?; + } + + write!(f, "{}", display_option_spaced(characteristics))?; + Ok(()) + } + TableConstraint::PrimaryKey { + name, + index_name, + index_type, + columns, + index_options, + characteristics, + } => { + write!( + f, + "{}PRIMARY KEY{}{} ({})", + display_constraint_name(name), + display_option_spaced(index_name), + display_option(" USING ", "", index_type), + display_comma_separated(columns), + )?; + + if !index_options.is_empty() { + write!(f, " {}", display_separated(index_options, " "))?; } + write!(f, "{}", display_option_spaced(characteristics))?; Ok(()) } TableConstraint::ForeignKey { @@ -550,9 +635,7 @@ impl fmt::Display for TableConstraint { write!(f, "SPATIAL")?; } - if !matches!(index_type_display, KeyOrIndexDisplay::None) { - write!(f, " {index_type_display}")?; - } + write!(f, "{index_type_display:>}")?; if let Some(name) = opt_index_name { write!(f, " {name}")?; @@ -585,8 +668,20 @@ pub enum KeyOrIndexDisplay { Index, } +impl KeyOrIndexDisplay { + pub fn is_none(self) -> bool { + matches!(self, Self::None) + } +} + impl fmt::Display for KeyOrIndexDisplay { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let left_space = matches!(f.align(), Some(fmt::Alignment::Right)); + + if left_space && !self.is_none() { + f.write_char(' ')? + } + match self { KeyOrIndexDisplay::None => { write!(f, "") @@ -626,6 +721,30 @@ impl fmt::Display for IndexType { } } } + +/// MySQLs index option. +/// +/// This structure used here [`MySQL` CREATE TABLE][1], [`MySQL` CREATE INDEX][2]. +/// +/// [1]: https://dev.mysql.com/doc/refman/8.3/en/create-table.html +/// [2]: https://dev.mysql.com/doc/refman/8.3/en/create-index.html +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum IndexOption { + Using(IndexType), + Comment(String), +} + +impl fmt::Display for IndexOption { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Self::Using(index_type) => write!(f, "USING {index_type}"), + Self::Comment(s) => write!(f, "COMMENT '{s}'"), + } + } +} + #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] @@ -909,6 +1028,7 @@ pub enum GeneratedExpressionMode { Stored, } +#[must_use] fn display_constraint_name(name: &'_ Option) -> impl fmt::Display + '_ { struct ConstraintName<'a>(&'a Option); impl<'a> fmt::Display for ConstraintName<'a> { @@ -922,6 +1042,36 @@ fn display_constraint_name(name: &'_ Option) -> impl fmt::Display + '_ { ConstraintName(name) } +/// If `option` is +/// * `Some(inner)` => create display struct for `"{prefix}{inner}{postfix}"` +/// * `_` => do nothing +#[must_use] +fn display_option<'a, T: fmt::Display>( + prefix: &'a str, + postfix: &'a str, + option: &'a Option, +) -> impl fmt::Display + 'a { + struct OptionDisplay<'a, T>(&'a str, &'a str, &'a Option); + impl<'a, T: fmt::Display> fmt::Display for OptionDisplay<'a, T> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + if let Some(inner) = self.2 { + let (prefix, postfix) = (self.0, self.1); + write!(f, "{prefix}{inner}{postfix}")?; + } + Ok(()) + } + } + OptionDisplay(prefix, postfix, option) +} + +/// If `option` is +/// * `Some(inner)` => create display struct for `" {inner}"` +/// * `_` => do nothing +#[must_use] +fn display_option_spaced(option: &Option) -> impl fmt::Display + '_ { + display_option(" ", "", option) +} + /// ` = [ DEFERRABLE | NOT DEFERRABLE ] [ INITIALLY DEFERRED | INITIALLY IMMEDIATE ] [ ENFORCED | NOT ENFORCED ]` /// /// Used in UNIQUE and foreign key constraints. The individual settings may occur in any order. diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 2eebbc604..a469338d6 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -33,7 +33,7 @@ pub use self::dcl::{AlterRoleOperation, ResetConfig, RoleOption, SetConfigValue} pub use self::ddl::{ AlterColumnOperation, AlterIndexOperation, AlterTableOperation, ColumnDef, ColumnOption, ColumnOptionDef, ConstraintCharacteristics, DeferrableInitial, GeneratedAs, - GeneratedExpressionMode, IndexType, KeyOrIndexDisplay, Partition, ProcedureParam, + GeneratedExpressionMode, IndexOption, IndexType, KeyOrIndexDisplay, Partition, ProcedureParam, ReferentialAction, TableConstraint, UserDefinedTypeCompositeAttributeDef, UserDefinedTypeRepresentation, ViewColumnDef, }; diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 48eaed92c..7bd3ffb21 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -5149,23 +5149,49 @@ impl<'a> Parser<'a> { let next_token = self.next_token(); match next_token.token { - Token::Word(w) if w.keyword == Keyword::PRIMARY || w.keyword == Keyword::UNIQUE => { - let is_primary = w.keyword == Keyword::PRIMARY; - - // parse optional [KEY] - let _ = self.parse_keyword(Keyword::KEY); + Token::Word(w) if w.keyword == Keyword::UNIQUE => { + let index_type_display = self.parse_index_type_display(); + if !dialect_of!(self is GenericDialect | MySqlDialect) + && !index_type_display.is_none() + { + return self + .expected("`index_name` or `(column_name [, ...])`", self.peek_token()); + } - // optional constraint name - let name = self - .maybe_parse(|parser| parser.parse_identifier(false)) - .or(name); + // optional index name + let index_name = self.parse_optional_indent(); + let index_type = self.parse_optional_using_then_index_type()?; let columns = self.parse_parenthesized_column_list(Mandatory, false)?; + let index_options = self.parse_index_options()?; let characteristics = self.parse_constraint_characteristics()?; Ok(Some(TableConstraint::Unique { name, + index_name, + index_type_display, + index_type, columns, - is_primary, + index_options, + characteristics, + })) + } + Token::Word(w) if w.keyword == Keyword::PRIMARY => { + // after `PRIMARY` always stay `KEY` + self.expect_keyword(Keyword::KEY)?; + + // optional index name + let index_name = self.parse_optional_indent(); + let index_type = self.parse_optional_using_then_index_type()?; + + let columns = self.parse_parenthesized_column_list(Mandatory, false)?; + let index_options = self.parse_index_options()?; + let characteristics = self.parse_constraint_characteristics()?; + Ok(Some(TableConstraint::PrimaryKey { + name, + index_name, + index_type, + columns, + index_options, characteristics, })) } @@ -5209,20 +5235,17 @@ impl<'a> Parser<'a> { } Token::Word(w) if (w.keyword == Keyword::INDEX || w.keyword == Keyword::KEY) - && dialect_of!(self is GenericDialect | MySqlDialect) => + && dialect_of!(self is GenericDialect | MySqlDialect) + && name.is_none() => { let display_as_key = w.keyword == Keyword::KEY; let name = match self.peek_token().token { Token::Word(word) if word.keyword == Keyword::USING => None, - _ => self.maybe_parse(|parser| parser.parse_identifier(false)), + _ => self.parse_optional_indent(), }; - let index_type = if self.parse_keyword(Keyword::USING) { - Some(self.parse_index_type()?) - } else { - None - }; + let index_type = self.parse_optional_using_then_index_type()?; let columns = self.parse_parenthesized_column_list(Mandatory, false)?; Ok(Some(TableConstraint::Index { @@ -5248,15 +5271,9 @@ impl<'a> Parser<'a> { let fulltext = w.keyword == Keyword::FULLTEXT; - let index_type_display = if self.parse_keyword(Keyword::KEY) { - KeyOrIndexDisplay::Key - } else if self.parse_keyword(Keyword::INDEX) { - KeyOrIndexDisplay::Index - } else { - KeyOrIndexDisplay::None - }; + let index_type_display = self.parse_index_type_display(); - let opt_index_name = self.maybe_parse(|parser| parser.parse_identifier(false)); + let opt_index_name = self.parse_optional_indent(); let columns = self.parse_parenthesized_column_list(Mandatory, false)?; @@ -5313,6 +5330,56 @@ impl<'a> Parser<'a> { } } + /// Parse [USING {BTREE | HASH}] + pub fn parse_optional_using_then_index_type( + &mut self, + ) -> Result, ParserError> { + if self.parse_keyword(Keyword::USING) { + Ok(Some(self.parse_index_type()?)) + } else { + Ok(None) + } + } + + /// Parse `[ident]`, mostly `ident` is name, like: + /// `window_name`, `index_name`, ... + pub fn parse_optional_indent(&mut self) -> Option { + self.maybe_parse(|parser| parser.parse_identifier(false)) + } + + #[must_use] + pub fn parse_index_type_display(&mut self) -> KeyOrIndexDisplay { + if self.parse_keyword(Keyword::KEY) { + KeyOrIndexDisplay::Key + } else if self.parse_keyword(Keyword::INDEX) { + KeyOrIndexDisplay::Index + } else { + KeyOrIndexDisplay::None + } + } + + pub fn parse_optional_index_option(&mut self) -> Result, ParserError> { + if let Some(index_type) = self.parse_optional_using_then_index_type()? { + Ok(Some(IndexOption::Using(index_type))) + } else if self.parse_keyword(Keyword::COMMENT) { + let s = self.parse_literal_string()?; + Ok(Some(IndexOption::Comment(s))) + } else { + Ok(None) + } + } + + pub fn parse_index_options(&mut self) -> Result, ParserError> { + let mut options = Vec::new(); + + loop { + match self.parse_optional_index_option()? { + Some(index_option) => options.push(index_option), + None => return Ok(options), + } + } + } + pub fn parse_sql_option(&mut self) -> Result { let name = self.parse_identifier(false)?; self.expect_token(&Token::Eq)?; @@ -9537,9 +9604,7 @@ impl<'a> Parser<'a> { pub fn parse_window_spec(&mut self) -> Result { let window_name = match self.peek_token().token { - Token::Word(word) if word.keyword == Keyword::NoKeyword => { - self.maybe_parse(|parser| parser.parse_identifier(false)) - } + Token::Word(word) if word.keyword == Keyword::NoKeyword => self.parse_optional_indent(), _ => None, }; diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index 59314c1d9..5f64079a6 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -500,63 +500,186 @@ fn parse_create_table_auto_increment() { } } -#[test] -fn parse_create_table_unique_key() { - let sql = "CREATE TABLE foo (id INT PRIMARY KEY AUTO_INCREMENT, bar INT NOT NULL, UNIQUE KEY bar_key (bar))"; - let canonical = "CREATE TABLE foo (id INT PRIMARY KEY AUTO_INCREMENT, bar INT NOT NULL, CONSTRAINT bar_key UNIQUE (bar))"; - match mysql().one_statement_parses_to(sql, canonical) { - Statement::CreateTable { +/// if `unique_index_type_display` is `Some` create `TableConstraint::Unique` +/// otherwise create `TableConstraint::Primary` +fn table_constraint_unique_primary_ctor( + name: Option, + index_name: Option, + index_type: Option, + columns: Vec, + index_options: Vec, + characteristics: Option, + unique_index_type_display: Option, +) -> TableConstraint { + match unique_index_type_display { + Some(index_type_display) => TableConstraint::Unique { name, + index_name, + index_type_display, + index_type, columns, - constraints, - .. - } => { - assert_eq!(name.to_string(), "foo"); - assert_eq!( - vec![TableConstraint::Unique { - name: Some(Ident::new("bar_key")), - columns: vec![Ident::new("bar")], - is_primary: false, - characteristics: None, - }], - constraints - ); - assert_eq!( - vec![ - ColumnDef { - name: Ident::new("id"), - data_type: DataType::Int(None), - collation: None, - options: vec![ - ColumnOptionDef { - name: None, - option: ColumnOption::Unique { - is_primary: true, - characteristics: None + index_options, + characteristics, + }, + None => TableConstraint::PrimaryKey { + name, + index_name, + index_type, + columns, + index_options, + characteristics, + }, + } +} + +#[test] +fn parse_create_table_primary_and_unique_key() { + let sqls = ["UNIQUE KEY", "PRIMARY KEY"] + .map(|key_ty|format!("CREATE TABLE foo (id INT PRIMARY KEY AUTO_INCREMENT, bar INT NOT NULL, CONSTRAINT bar_key {key_ty} (bar))")); + + let index_type_display = [Some(KeyOrIndexDisplay::Key), None]; + + for (sql, index_type_display) in sqls.iter().zip(index_type_display) { + match mysql().one_statement_parses_to(sql, "") { + Statement::CreateTable { + name, + columns, + constraints, + .. + } => { + assert_eq!(name.to_string(), "foo"); + + let expected_constraint = table_constraint_unique_primary_ctor( + Some(Ident::new("bar_key")), + None, + None, + vec![Ident::new("bar")], + vec![], + None, + index_type_display, + ); + assert_eq!(vec![expected_constraint], constraints); + + assert_eq!( + vec![ + ColumnDef { + name: Ident::new("id"), + data_type: DataType::Int(None), + collation: None, + options: vec![ + ColumnOptionDef { + name: None, + option: ColumnOption::Unique { + is_primary: true, + characteristics: None + }, }, - }, - ColumnOptionDef { + ColumnOptionDef { + name: None, + option: ColumnOption::DialectSpecific(vec![ + Token::make_keyword("AUTO_INCREMENT") + ]), + }, + ], + }, + ColumnDef { + name: Ident::new("bar"), + data_type: DataType::Int(None), + collation: None, + options: vec![ColumnOptionDef { name: None, - option: ColumnOption::DialectSpecific(vec![Token::make_keyword( - "AUTO_INCREMENT" - )]), - }, - ], - }, - ColumnDef { - name: Ident::new("bar"), - data_type: DataType::Int(None), - collation: None, - options: vec![ColumnOptionDef { - name: None, - option: ColumnOption::NotNull, - },], - }, - ], - columns - ); + option: ColumnOption::NotNull, + },], + }, + ], + columns + ); + } + _ => unreachable!(), } - _ => unreachable!(), + } +} + +#[test] +fn parse_create_table_primary_and_unique_key_with_index_options() { + let sqls = ["UNIQUE INDEX", "PRIMARY KEY"] + .map(|key_ty|format!("CREATE TABLE foo (bar INT, var INT, CONSTRAINT constr {key_ty} index_name (bar, var) USING HASH COMMENT 'yes, ' USING BTREE COMMENT 'MySQL allows')")); + + let index_type_display = [Some(KeyOrIndexDisplay::Index), None]; + + for (sql, index_type_display) in sqls.iter().zip(index_type_display) { + match mysql_and_generic().one_statement_parses_to(sql, "") { + Statement::CreateTable { + name, constraints, .. + } => { + assert_eq!(name.to_string(), "foo"); + + let expected_constraint = table_constraint_unique_primary_ctor( + Some(Ident::new("constr")), + Some(Ident::new("index_name")), + None, + vec![Ident::new("bar"), Ident::new("var")], + vec![ + IndexOption::Using(IndexType::Hash), + IndexOption::Comment("yes, ".into()), + IndexOption::Using(IndexType::BTree), + IndexOption::Comment("MySQL allows".into()), + ], + None, + index_type_display, + ); + assert_eq!(vec![expected_constraint], constraints); + } + _ => unreachable!(), + } + + mysql_and_generic().verified_stmt(sql); + } +} + +#[test] +fn parse_create_table_primary_and_unique_key_with_index_type() { + let sqls = ["UNIQUE", "PRIMARY KEY"].map(|key_ty| { + format!("CREATE TABLE foo (bar INT, {key_ty} index_name USING BTREE (bar) USING HASH)") + }); + + let index_type_display = [Some(KeyOrIndexDisplay::None), None]; + + for (sql, index_type_display) in sqls.iter().zip(index_type_display) { + match mysql_and_generic().one_statement_parses_to(sql, "") { + Statement::CreateTable { + name, constraints, .. + } => { + assert_eq!(name.to_string(), "foo"); + + let expected_constraint = table_constraint_unique_primary_ctor( + None, + Some(Ident::new("index_name")), + Some(IndexType::BTree), + vec![Ident::new("bar")], + vec![IndexOption::Using(IndexType::Hash)], + None, + index_type_display, + ); + assert_eq!(vec![expected_constraint], constraints); + } + _ => unreachable!(), + } + mysql_and_generic().verified_stmt(sql); + } + + let sql = "CREATE TABLE foo (bar INT, UNIQUE INDEX index_name USING BTREE (bar) USING HASH)"; + mysql_and_generic().verified_stmt(sql); + let sql = "CREATE TABLE foo (bar INT, PRIMARY KEY index_name USING BTREE (bar) USING HASH)"; + mysql_and_generic().verified_stmt(sql); +} + +#[test] +fn parse_create_table_primary_and_unique_key_characteristic_test() { + let sqls = ["UNIQUE INDEX", "PRIMARY KEY"] + .map(|key_ty|format!("CREATE TABLE x (y INT, CONSTRAINT constr {key_ty} (y) NOT DEFERRABLE INITIALLY IMMEDIATE)")); + for sql in &sqls { + mysql_and_generic().verified_stmt(sql); } } @@ -2333,6 +2456,15 @@ fn parse_create_table_with_index_definition() { ); } +#[test] +fn parse_create_table_unallow_constraint_then_index() { + let sql = "CREATE TABLE foo (bar INT, CONSTRAINT constr INDEX index (bar))"; + assert!(mysql_and_generic().parse_sql_statements(sql).is_err()); + + let sql = "CREATE TABLE foo (bar INT, INDEX index (bar))"; + assert!(mysql_and_generic().parse_sql_statements(sql).is_ok()); +} + #[test] fn parse_create_table_with_fulltext_definition() { mysql_and_generic().verified_stmt("CREATE TABLE tb (id INT, FULLTEXT (id))"); From 241da85d67b8dd401ee086c324c3194792715cf7 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 9 Apr 2024 16:21:08 -0400 Subject: [PATCH 18/42] Support `CREATE/DROP SECRET` for duckdb dialect (#1208) Co-authored-by: Jichao Sun <4977515+JichaoS@users.noreply.github.com> --- src/ast/mod.rs | 169 ++++++++++++++++++++++++++++++++++++ src/keywords.rs | 4 + src/parser/mod.rs | 174 +++++++++++++++++++++++++++++++++++++- tests/sqlparser_duckdb.rs | 141 ++++++++++++++++++++++++++++++ 4 files changed, 486 insertions(+), 2 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index a469338d6..8fc696baa 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -2040,6 +2040,19 @@ pub enum Statement { authorization_owner: Option, }, /// ```sql + /// CREATE SECRET + /// ``` + /// See [duckdb](https://duckdb.org/docs/sql/statements/create_secret.html) + CreateSecret { + or_replace: bool, + temporary: Option, + if_not_exists: bool, + name: Option, + storage_specifier: Option, + secret_type: Ident, + options: Vec, + }, + /// ```sql /// ALTER TABLE /// ``` AlterTable { @@ -2088,6 +2101,31 @@ pub enum Statement { /// true if the syntax is 'ATTACH DATABASE', false if it's just 'ATTACH' database: bool, }, + /// (DuckDB-specific) + /// ```sql + /// ATTACH 'sqlite_file.db' AS sqlite_db (READ_ONLY, TYPE SQLITE); + /// ``` + /// See + AttachDuckDBDatabase { + if_not_exists: bool, + /// true if the syntax is 'ATTACH DATABASE', false if it's just 'ATTACH' + database: bool, + /// An expression that indicates the path to the database file + database_path: Ident, + database_alias: Option, + attach_options: Vec, + }, + /// (DuckDB-specific) + /// ```sql + /// DETACH db_alias; + /// ``` + /// See + DetachDuckDBDatabase { + if_exists: bool, + /// true if the syntax is 'DETACH DATABASE', false if it's just 'DETACH' + database: bool, + database_alias: Ident, + }, /// ```sql /// DROP [TABLE, VIEW, ...] /// ``` @@ -2121,6 +2159,15 @@ pub enum Statement { option: Option, }, /// ```sql + /// DROP SECRET + /// ``` + DropSecret { + if_exists: bool, + temporary: Option, + name: Ident, + storage_specifier: Option, + }, + /// ```sql /// DECLARE /// ``` /// Declare Cursor Variables @@ -2772,6 +2819,40 @@ impl fmt::Display for Statement { let keyword = if *database { "DATABASE " } else { "" }; write!(f, "ATTACH {keyword}{database_file_name} AS {schema_name}") } + Statement::AttachDuckDBDatabase { + if_not_exists, + database, + database_path, + database_alias, + attach_options, + } => { + write!( + f, + "ATTACH{database}{if_not_exists} {database_path}", + database = if *database { " DATABASE" } else { "" }, + if_not_exists = if *if_not_exists { " IF NOT EXISTS" } else { "" }, + )?; + if let Some(alias) = database_alias { + write!(f, " AS {alias}")?; + } + if !attach_options.is_empty() { + write!(f, " ({})", display_comma_separated(attach_options))?; + } + Ok(()) + } + Statement::DetachDuckDBDatabase { + if_exists, + database, + database_alias, + } => { + write!( + f, + "DETACH{database}{if_exists} {database_alias}", + database = if *database { " DATABASE" } else { "" }, + if_exists = if *if_exists { " IF EXISTS" } else { "" }, + )?; + Ok(()) + } Statement::Analyze { table_name, partitions, @@ -3556,6 +3637,41 @@ impl fmt::Display for Statement { } Ok(()) } + Statement::CreateSecret { + or_replace, + temporary, + if_not_exists, + name, + storage_specifier, + secret_type, + options, + } => { + write!( + f, + "CREATE {or_replace}", + or_replace = if *or_replace { "OR REPLACE " } else { "" }, + )?; + if let Some(t) = temporary { + write!(f, "{}", if *t { "TEMPORARY " } else { "PERSISTENT " })?; + } + write!( + f, + "SECRET {if_not_exists}", + if_not_exists = if *if_not_exists { "IF NOT EXISTS " } else { "" }, + )?; + if let Some(n) = name { + write!(f, "{n} ")?; + }; + if let Some(s) = storage_specifier { + write!(f, "IN {s} ")?; + } + write!(f, "( TYPE {secret_type}",)?; + if !options.is_empty() { + write!(f, ", {o}", o = display_comma_separated(options))?; + } + write!(f, " )")?; + Ok(()) + } Statement::AlterTable { name, if_exists, @@ -3636,6 +3752,26 @@ impl fmt::Display for Statement { } Ok(()) } + Statement::DropSecret { + if_exists, + temporary, + name, + storage_specifier, + } => { + write!(f, "DROP ")?; + if let Some(t) = temporary { + write!(f, "{}", if *t { "TEMPORARY " } else { "PERSISTENT " })?; + } + write!( + f, + "SECRET {if_exists}{name}", + if_exists = if *if_exists { "IF EXISTS " } else { "" }, + )?; + if let Some(s) = storage_specifier { + write!(f, " FROM {s}")?; + } + Ok(()) + } Statement::Discard { object_type } => { write!(f, "DISCARD {object_type}")?; Ok(()) @@ -5070,6 +5206,39 @@ impl fmt::Display for SqlOption { } } +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct SecretOption { + pub key: Ident, + pub value: Ident, +} + +impl fmt::Display for SecretOption { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{} {}", self.key, self.value) + } +} + +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum AttachDuckDBDatabaseOption { + ReadOnly(Option), + Type(Ident), +} + +impl fmt::Display for AttachDuckDBDatabaseOption { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + AttachDuckDBDatabaseOption::ReadOnly(Some(true)) => write!(f, "READ_ONLY true"), + AttachDuckDBDatabaseOption::ReadOnly(Some(false)) => write!(f, "READ_ONLY false"), + AttachDuckDBDatabaseOption::ReadOnly(None) => write!(f, "READ_ONLY"), + AttachDuckDBDatabaseOption::Type(t) => write!(f, "TYPE {}", t), + } + } +} + #[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] diff --git a/src/keywords.rs b/src/keywords.rs index 91842672d..12a376b2a 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -224,6 +224,7 @@ define_keywords!( DEREF, DESC, DESCRIBE, + DETACH, DETAIL, DETERMINISTIC, DIRECTORY, @@ -514,6 +515,7 @@ define_keywords!( PERCENTILE_DISC, PERCENT_RANK, PERIOD, + PERSISTENT, PIVOT, PLACING, PLANS, @@ -543,6 +545,7 @@ define_keywords!( RCFILE, READ, READS, + READ_ONLY, REAL, RECURSIVE, REF, @@ -601,6 +604,7 @@ define_keywords!( SCROLL, SEARCH, SECOND, + SECRET, SECURITY, SELECT, SEMI, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 7bd3ffb21..6fce36844 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -473,7 +473,16 @@ impl<'a> Parser<'a> { Ok(Statement::Query(self.parse_boxed_query()?)) } Keyword::TRUNCATE => Ok(self.parse_truncate()?), - Keyword::ATTACH => Ok(self.parse_attach_database()?), + Keyword::ATTACH => { + if dialect_of!(self is DuckDbDialect) { + Ok(self.parse_attach_duckdb_database()?) + } else { + Ok(self.parse_attach_database()?) + } + } + Keyword::DETACH if dialect_of!(self is DuckDbDialect | GenericDialect) => { + Ok(self.parse_detach_duckdb_database()?) + } Keyword::MSCK => Ok(self.parse_msck()?), Keyword::CREATE => Ok(self.parse_create()?), Keyword::CACHE => Ok(self.parse_cache_table()?), @@ -666,6 +675,72 @@ impl<'a> Parser<'a> { }) } + pub fn parse_attach_duckdb_database_options( + &mut self, + ) -> Result, ParserError> { + if !self.consume_token(&Token::LParen) { + return Ok(vec![]); + } + + let mut options = vec![]; + loop { + if self.parse_keyword(Keyword::READ_ONLY) { + let boolean = if self.parse_keyword(Keyword::TRUE) { + Some(true) + } else if self.parse_keyword(Keyword::FALSE) { + Some(false) + } else { + None + }; + options.push(AttachDuckDBDatabaseOption::ReadOnly(boolean)); + } else if self.parse_keyword(Keyword::TYPE) { + let ident = self.parse_identifier(false)?; + options.push(AttachDuckDBDatabaseOption::Type(ident)); + } else { + return self.expected("expected one of: ), READ_ONLY, TYPE", self.peek_token()); + }; + + if self.consume_token(&Token::RParen) { + return Ok(options); + } else if self.consume_token(&Token::Comma) { + continue; + } else { + return self.expected("expected one of: ')', ','", self.peek_token()); + } + } + } + + pub fn parse_attach_duckdb_database(&mut self) -> Result { + let database = self.parse_keyword(Keyword::DATABASE); + let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); + let database_path = self.parse_identifier(false)?; + let database_alias = if self.parse_keyword(Keyword::AS) { + Some(self.parse_identifier(false)?) + } else { + None + }; + + let attach_options = self.parse_attach_duckdb_database_options()?; + Ok(Statement::AttachDuckDBDatabase { + if_not_exists, + database, + database_path, + database_alias, + attach_options, + }) + } + + pub fn parse_detach_duckdb_database(&mut self) -> Result { + let database = self.parse_keyword(Keyword::DATABASE); + let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); + let database_alias = self.parse_identifier(false)?; + Ok(Statement::DetachDuckDBDatabase { + if_exists, + database, + database_alias, + }) + } + pub fn parse_attach_database(&mut self) -> Result { let database = self.parse_keyword(Keyword::DATABASE); let database_file_name = self.parse_expr()?; @@ -3075,6 +3150,8 @@ impl<'a> Parser<'a> { let temporary = self .parse_one_of_keywords(&[Keyword::TEMP, Keyword::TEMPORARY]) .is_some(); + let persistent = dialect_of!(self is DuckDbDialect) + && self.parse_one_of_keywords(&[Keyword::PERSISTENT]).is_some(); if self.parse_keyword(Keyword::TABLE) { self.parse_create_table(or_replace, temporary, global, transient) } else if self.parse_keyword(Keyword::MATERIALIZED) || self.parse_keyword(Keyword::VIEW) { @@ -3086,6 +3163,8 @@ impl<'a> Parser<'a> { self.parse_create_function(or_replace, temporary) } else if self.parse_keyword(Keyword::MACRO) { self.parse_create_macro(or_replace, temporary) + } else if self.parse_keyword(Keyword::SECRET) { + self.parse_create_secret(or_replace, temporary, persistent) } else if or_replace { self.expected( "[EXTERNAL] TABLE or [MATERIALIZED] VIEW or FUNCTION after CREATE OR REPLACE", @@ -3116,6 +3195,65 @@ impl<'a> Parser<'a> { } } + /// See [DuckDB Docs](https://duckdb.org/docs/sql/statements/create_secret.html) for more details. + pub fn parse_create_secret( + &mut self, + or_replace: bool, + temporary: bool, + persistent: bool, + ) -> Result { + let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); + + let mut storage_specifier = None; + let mut name = None; + if self.peek_token() != Token::LParen { + if self.parse_keyword(Keyword::IN) { + storage_specifier = self.parse_identifier(false).ok() + } else { + name = self.parse_identifier(false).ok(); + } + + // Storage specifier may follow the name + if storage_specifier.is_none() + && self.peek_token() != Token::LParen + && self.parse_keyword(Keyword::IN) + { + storage_specifier = self.parse_identifier(false).ok(); + } + } + + self.expect_token(&Token::LParen)?; + self.expect_keyword(Keyword::TYPE)?; + let secret_type = self.parse_identifier(false)?; + + let mut options = Vec::new(); + if self.consume_token(&Token::Comma) { + options.append(&mut self.parse_comma_separated(|p| { + let key = p.parse_identifier(false)?; + let value = p.parse_identifier(false)?; + Ok(SecretOption { key, value }) + })?); + } + self.expect_token(&Token::RParen)?; + + let temp = match (temporary, persistent) { + (true, false) => Some(true), + (false, true) => Some(false), + (false, false) => None, + _ => self.expected("TEMPORARY or PERSISTENT", self.peek_token())?, + }; + + Ok(Statement::CreateSecret { + or_replace, + temporary: temp, + if_not_exists, + name, + storage_specifier, + secret_type, + options, + }) + } + /// Parse a CACHE TABLE statement pub fn parse_cache_table(&mut self) -> Result { let (mut table_flag, mut options, mut has_as, mut query) = (None, vec![], false, None); @@ -3889,8 +4027,10 @@ impl<'a> Parser<'a> { pub fn parse_drop(&mut self) -> Result { // MySQL dialect supports `TEMPORARY` - let temporary = dialect_of!(self is MySqlDialect | GenericDialect) + let temporary = dialect_of!(self is MySqlDialect | GenericDialect | DuckDbDialect) && self.parse_keyword(Keyword::TEMPORARY); + let persistent = dialect_of!(self is DuckDbDialect) + && self.parse_one_of_keywords(&[Keyword::PERSISTENT]).is_some(); let object_type = if self.parse_keyword(Keyword::TABLE) { ObjectType::Table @@ -3908,6 +4048,8 @@ impl<'a> Parser<'a> { ObjectType::Stage } else if self.parse_keyword(Keyword::FUNCTION) { return self.parse_drop_function(); + } else if self.parse_keyword(Keyword::SECRET) { + return self.parse_drop_secret(temporary, persistent); } else { return self.expected( "TABLE, VIEW, INDEX, ROLE, SCHEMA, FUNCTION, STAGE or SEQUENCE after DROP", @@ -3980,6 +4122,34 @@ impl<'a> Parser<'a> { Ok(DropFunctionDesc { name, args }) } + /// See [DuckDB Docs](https://duckdb.org/docs/sql/statements/create_secret.html) for more details. + fn parse_drop_secret( + &mut self, + temporary: bool, + persistent: bool, + ) -> Result { + let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); + let name = self.parse_identifier(false)?; + let storage_specifier = if self.parse_keyword(Keyword::FROM) { + self.parse_identifier(false).ok() + } else { + None + }; + let temp = match (temporary, persistent) { + (true, false) => Some(true), + (false, true) => Some(false), + (false, false) => None, + _ => self.expected("TEMPORARY or PERSISTENT", self.peek_token())?, + }; + + Ok(Statement::DropSecret { + if_exists, + temporary: temp, + name, + storage_specifier, + }) + } + /// Parse a `DECLARE` statement. /// /// ```sql diff --git a/tests/sqlparser_duckdb.rs b/tests/sqlparser_duckdb.rs index e41109d95..d6a6b7d4b 100644 --- a/tests/sqlparser_duckdb.rs +++ b/tests/sqlparser_duckdb.rs @@ -334,6 +334,147 @@ fn test_duckdb_struct_literal() { ); } +#[test] +fn test_create_secret() { + let sql = r#"CREATE OR REPLACE PERSISTENT SECRET IF NOT EXISTS name IN storage ( TYPE type, key1 value1, key2 value2 )"#; + let stmt = duckdb().verified_stmt(sql); + assert_eq!( + Statement::CreateSecret { + or_replace: true, + temporary: Some(false), + if_not_exists: true, + name: Some(Ident::new("name")), + storage_specifier: Some(Ident::new("storage")), + secret_type: Ident::new("type"), + options: vec![ + SecretOption { + key: Ident::new("key1"), + value: Ident::new("value1"), + }, + SecretOption { + key: Ident::new("key2"), + value: Ident::new("value2"), + } + ] + }, + stmt + ); +} + +#[test] +fn test_create_secret_simple() { + let sql = r#"CREATE SECRET ( TYPE type )"#; + let stmt = duckdb().verified_stmt(sql); + assert_eq!( + Statement::CreateSecret { + or_replace: false, + temporary: None, + if_not_exists: false, + name: None, + storage_specifier: None, + secret_type: Ident::new("type"), + options: vec![] + }, + stmt + ); +} + +#[test] +fn test_drop_secret() { + let sql = r#"DROP PERSISTENT SECRET IF EXISTS secret FROM storage"#; + let stmt = duckdb().verified_stmt(sql); + assert_eq!( + Statement::DropSecret { + if_exists: true, + temporary: Some(false), + name: Ident::new("secret"), + storage_specifier: Some(Ident::new("storage")) + }, + stmt + ); +} + +#[test] +fn test_drop_secret_simple() { + let sql = r#"DROP SECRET secret"#; + let stmt = duckdb().verified_stmt(sql); + assert_eq!( + Statement::DropSecret { + if_exists: false, + temporary: None, + name: Ident::new("secret"), + storage_specifier: None + }, + stmt + ); +} + +#[test] +fn test_attach_database() { + let sql = r#"ATTACH DATABASE IF NOT EXISTS 'sqlite_file.db' AS sqlite_db (READ_ONLY false, TYPE SQLITE)"#; + let stmt = duckdb().verified_stmt(sql); + assert_eq!( + Statement::AttachDuckDBDatabase { + if_not_exists: true, + database: true, + database_path: Ident::with_quote('\'', "sqlite_file.db"), + database_alias: Some(Ident::new("sqlite_db")), + attach_options: vec![ + AttachDuckDBDatabaseOption::ReadOnly(Some(false)), + AttachDuckDBDatabaseOption::Type(Ident::new("SQLITE")), + ] + }, + stmt + ); +} + +#[test] +fn test_attach_database_simple() { + let sql = r#"ATTACH 'postgres://user.name:pass-word@some.url.com:5432/postgres'"#; + let stmt = duckdb().verified_stmt(sql); + assert_eq!( + Statement::AttachDuckDBDatabase { + if_not_exists: false, + database: false, + database_path: Ident::with_quote( + '\'', + "postgres://user.name:pass-word@some.url.com:5432/postgres" + ), + database_alias: None, + attach_options: vec![] + }, + stmt + ); +} + +#[test] +fn test_detach_database() { + let sql = r#"DETACH DATABASE IF EXISTS db_name"#; + let stmt = duckdb().verified_stmt(sql); + assert_eq!( + Statement::DetachDuckDBDatabase { + if_exists: true, + database: true, + database_alias: Ident::new("db_name"), + }, + stmt + ); +} + +#[test] +fn test_detach_database_simple() { + let sql = r#"DETACH db_name"#; + let stmt = duckdb().verified_stmt(sql); + assert_eq!( + Statement::DetachDuckDBDatabase { + if_exists: false, + database: false, + database_alias: Ident::new("db_name"), + }, + stmt + ); +} + #[test] fn test_duckdb_named_argument_function_with_assignment_operator() { let sql = "SELECT FUN(a := '1', b := '2') FROM foo"; From 8dd213cff28e36b98f21dca09dbb94a10b5af22b Mon Sep 17 00:00:00 2001 From: Ifeanyi Ubah Date: Tue, 9 Apr 2024 23:05:31 +0200 Subject: [PATCH 19/42] BigQuery: support unquoted hyphen in table/view declaration (#1178) --- src/parser/mod.rs | 10 +++++---- tests/sqlparser_bigquery.rs | 45 +++++++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+), 4 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 6fce36844..5daf861f4 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -3749,7 +3749,8 @@ impl<'a> Parser<'a> { && self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); // Many dialects support `OR ALTER` right after `CREATE`, but we don't (yet). // ANSI SQL and Postgres support RECURSIVE here, but we don't support it either. - let name = self.parse_object_name(false)?; + let allow_unquoted_hyphen = dialect_of!(self is BigQueryDialect); + let name = self.parse_object_name(allow_unquoted_hyphen)?; let columns = self.parse_view_columns()?; let mut options = CreateTableOptions::None; let with_options = self.parse_options(Keyword::WITH)?; @@ -4736,8 +4737,9 @@ impl<'a> Parser<'a> { global: Option, transient: bool, ) -> Result { + let allow_unquoted_hyphen = dialect_of!(self is BigQueryDialect); let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); - let table_name = self.parse_object_name(false)?; + let table_name = self.parse_object_name(allow_unquoted_hyphen)?; // Clickhouse has `ON CLUSTER 'cluster'` syntax for DDLs let on_cluster = if self.parse_keywords(&[Keyword::ON, Keyword::CLUSTER]) { @@ -4752,13 +4754,13 @@ impl<'a> Parser<'a> { }; let like = if self.parse_keyword(Keyword::LIKE) || self.parse_keyword(Keyword::ILIKE) { - self.parse_object_name(false).ok() + self.parse_object_name(allow_unquoted_hyphen).ok() } else { None }; let clone = if self.parse_keyword(Keyword::CLONE) { - self.parse_object_name(false).ok() + self.parse_object_name(allow_unquoted_hyphen).ok() } else { None }; diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index 7bc715a0c..d9081461b 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -206,6 +206,51 @@ fn parse_create_view_if_not_exists() { } } +#[test] +fn parse_create_view_with_unquoted_hyphen() { + let sql = "CREATE VIEW IF NOT EXISTS my-pro-ject.mydataset.myview AS SELECT 1"; + match bigquery().verified_stmt(sql) { + Statement::CreateView { + name, + query, + if_not_exists, + .. + } => { + assert_eq!("my-pro-ject.mydataset.myview", name.to_string()); + assert_eq!("SELECT 1", query.to_string()); + assert!(if_not_exists); + } + _ => unreachable!(), + } +} + +#[test] +fn parse_create_table_with_unquoted_hyphen() { + let sql = "CREATE TABLE my-pro-ject.mydataset.mytable (x INT64)"; + match bigquery().verified_stmt(sql) { + Statement::CreateTable { name, columns, .. } => { + assert_eq!( + name, + ObjectName(vec![ + "my-pro-ject".into(), + "mydataset".into(), + "mytable".into() + ]) + ); + assert_eq!( + vec![ColumnDef { + name: Ident::new("x"), + data_type: DataType::Int64, + collation: None, + options: vec![] + },], + columns + ); + } + _ => unreachable!(), + } +} + #[test] fn parse_create_table_with_options() { let sql = concat!( From 127be973692153a141eeaf4c8d21b418ba2ffb9a Mon Sep 17 00:00:00 2001 From: Ifeanyi Ubah Date: Tue, 9 Apr 2024 23:16:03 +0200 Subject: [PATCH 20/42] Support more `DateTimeField` variants (#1191) --- src/ast/mod.rs | 6 +- src/ast/value.rs | 114 +++++++++++++++++++++++------------ src/parser/mod.rs | 19 +++++- tests/sqlparser_bigquery.rs | 13 ++++ tests/sqlparser_common.rs | 10 ++- tests/sqlparser_snowflake.rs | 13 ++++ 6 files changed, 129 insertions(+), 46 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 8fc696baa..c5386f878 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -229,7 +229,7 @@ impl fmt::Display for Interval { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { let value = self.value.as_ref(); match ( - self.leading_field, + &self.leading_field, self.leading_precision, self.fractional_seconds_precision, ) { @@ -248,13 +248,13 @@ impl fmt::Display for Interval { } _ => { write!(f, "INTERVAL {value}")?; - if let Some(leading_field) = self.leading_field { + if let Some(leading_field) = &self.leading_field { write!(f, " {leading_field}")?; } if let Some(leading_precision) = self.leading_precision { write!(f, " ({leading_precision})")?; } - if let Some(last_field) = self.last_field { + if let Some(last_field) = &self.last_field { write!(f, " TO {last_field}")?; } if let Some(fractional_seconds_precision) = self.fractional_seconds_precision { diff --git a/src/ast/value.rs b/src/ast/value.rs index a9c74d4a8..d596cd648 100644 --- a/src/ast/value.rs +++ b/src/ast/value.rs @@ -12,6 +12,13 @@ #[cfg(not(feature = "std"))] use alloc::string::String; + +#[cfg(not(feature = "std"))] +use alloc::format; + +#[cfg(not(feature = "std"))] +use alloc::string::ToString; + use core::fmt; #[cfg(feature = "bigdecimal")] @@ -20,6 +27,7 @@ use bigdecimal::BigDecimal; #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; +use crate::ast::Ident; #[cfg(feature = "visitor")] use sqlparser_derive::{Visit, VisitMut}; @@ -109,17 +117,25 @@ impl fmt::Display for DollarQuotedString { } } -#[derive(Debug, Copy, Clone, PartialEq, Eq, Ord, PartialOrd, Hash)] +#[derive(Debug, Clone, PartialEq, Eq, Ord, PartialOrd, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum DateTimeField { Year, Month, - Week, + /// Week optionally followed by a WEEKDAY. + /// + /// ```sql + /// WEEK(MONDAY) + /// ``` + /// + /// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/date_functions#extract) + Week(Option), Day, DayOfWeek, DayOfYear, Date, + Datetime, Hour, Minute, Second, @@ -148,47 +164,67 @@ pub enum DateTimeField { TimezoneMinute, TimezoneRegion, NoDateTime, + /// Arbitrary abbreviation or custom date-time part. + /// + /// ```sql + /// EXTRACT(q FROM CURRENT_TIMESTAMP) + /// ``` + /// [Snowflake](https://docs.snowflake.com/en/sql-reference/functions-date-time#supported-date-and-time-parts) + Custom(Ident), } impl fmt::Display for DateTimeField { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - f.write_str(match self { - DateTimeField::Year => "YEAR", - DateTimeField::Month => "MONTH", - DateTimeField::Week => "WEEK", - DateTimeField::Day => "DAY", - DateTimeField::DayOfWeek => "DAYOFWEEK", - DateTimeField::DayOfYear => "DAYOFYEAR", - DateTimeField::Date => "DATE", - DateTimeField::Hour => "HOUR", - DateTimeField::Minute => "MINUTE", - DateTimeField::Second => "SECOND", - DateTimeField::Century => "CENTURY", - DateTimeField::Decade => "DECADE", - DateTimeField::Dow => "DOW", - DateTimeField::Doy => "DOY", - DateTimeField::Epoch => "EPOCH", - DateTimeField::Isodow => "ISODOW", - DateTimeField::Isoyear => "ISOYEAR", - DateTimeField::IsoWeek => "ISOWEEK", - DateTimeField::Julian => "JULIAN", - DateTimeField::Microsecond => "MICROSECOND", - DateTimeField::Microseconds => "MICROSECONDS", - DateTimeField::Millenium => "MILLENIUM", - DateTimeField::Millennium => "MILLENNIUM", - DateTimeField::Millisecond => "MILLISECOND", - DateTimeField::Milliseconds => "MILLISECONDS", - DateTimeField::Nanosecond => "NANOSECOND", - DateTimeField::Nanoseconds => "NANOSECONDS", - DateTimeField::Quarter => "QUARTER", - DateTimeField::Time => "TIME", - DateTimeField::Timezone => "TIMEZONE", - DateTimeField::TimezoneAbbr => "TIMEZONE_ABBR", - DateTimeField::TimezoneHour => "TIMEZONE_HOUR", - DateTimeField::TimezoneMinute => "TIMEZONE_MINUTE", - DateTimeField::TimezoneRegion => "TIMEZONE_REGION", - DateTimeField::NoDateTime => "NODATETIME", - }) + f.write_str( + match self { + DateTimeField::Year => "YEAR".to_string(), + DateTimeField::Month => "MONTH".to_string(), + DateTimeField::Week(week_day) => { + format!( + "WEEK{}", + week_day + .as_ref() + .map(|w| format!("({w})")) + .unwrap_or_default() + ) + } + DateTimeField::Day => "DAY".to_string(), + DateTimeField::DayOfWeek => "DAYOFWEEK".to_string(), + DateTimeField::DayOfYear => "DAYOFYEAR".to_string(), + DateTimeField::Date => "DATE".to_string(), + DateTimeField::Datetime => "DATETIME".to_string(), + DateTimeField::Hour => "HOUR".to_string(), + DateTimeField::Minute => "MINUTE".to_string(), + DateTimeField::Second => "SECOND".to_string(), + DateTimeField::Century => "CENTURY".to_string(), + DateTimeField::Decade => "DECADE".to_string(), + DateTimeField::Dow => "DOW".to_string(), + DateTimeField::Doy => "DOY".to_string(), + DateTimeField::Epoch => "EPOCH".to_string(), + DateTimeField::Isodow => "ISODOW".to_string(), + DateTimeField::Isoyear => "ISOYEAR".to_string(), + DateTimeField::IsoWeek => "ISOWEEK".to_string(), + DateTimeField::Julian => "JULIAN".to_string(), + DateTimeField::Microsecond => "MICROSECOND".to_string(), + DateTimeField::Microseconds => "MICROSECONDS".to_string(), + DateTimeField::Millenium => "MILLENIUM".to_string(), + DateTimeField::Millennium => "MILLENNIUM".to_string(), + DateTimeField::Millisecond => "MILLISECOND".to_string(), + DateTimeField::Milliseconds => "MILLISECONDS".to_string(), + DateTimeField::Nanosecond => "NANOSECOND".to_string(), + DateTimeField::Nanoseconds => "NANOSECONDS".to_string(), + DateTimeField::Quarter => "QUARTER".to_string(), + DateTimeField::Time => "TIME".to_string(), + DateTimeField::Timezone => "TIMEZONE".to_string(), + DateTimeField::TimezoneAbbr => "TIMEZONE_ABBR".to_string(), + DateTimeField::TimezoneHour => "TIMEZONE_HOUR".to_string(), + DateTimeField::TimezoneMinute => "TIMEZONE_MINUTE".to_string(), + DateTimeField::TimezoneRegion => "TIMEZONE_REGION".to_string(), + DateTimeField::NoDateTime => "NODATETIME".to_string(), + DateTimeField::Custom(custom) => format!("{custom}"), + } + .as_str(), + ) } } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 5daf861f4..7cdf07bd7 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1857,11 +1857,23 @@ impl<'a> Parser<'a> { Token::Word(w) => match w.keyword { Keyword::YEAR => Ok(DateTimeField::Year), Keyword::MONTH => Ok(DateTimeField::Month), - Keyword::WEEK => Ok(DateTimeField::Week), + Keyword::WEEK => { + let week_day = if dialect_of!(self is BigQueryDialect | GenericDialect) + && self.consume_token(&Token::LParen) + { + let week_day = self.parse_identifier(false)?; + self.expect_token(&Token::RParen)?; + Some(week_day) + } else { + None + }; + Ok(DateTimeField::Week(week_day)) + } Keyword::DAY => Ok(DateTimeField::Day), Keyword::DAYOFWEEK => Ok(DateTimeField::DayOfWeek), Keyword::DAYOFYEAR => Ok(DateTimeField::DayOfYear), Keyword::DATE => Ok(DateTimeField::Date), + Keyword::DATETIME => Ok(DateTimeField::Datetime), Keyword::HOUR => Ok(DateTimeField::Hour), Keyword::MINUTE => Ok(DateTimeField::Minute), Keyword::SECOND => Ok(DateTimeField::Second), @@ -1889,6 +1901,11 @@ impl<'a> Parser<'a> { Keyword::TIMEZONE_HOUR => Ok(DateTimeField::TimezoneHour), Keyword::TIMEZONE_MINUTE => Ok(DateTimeField::TimezoneMinute), Keyword::TIMEZONE_REGION => Ok(DateTimeField::TimezoneRegion), + _ if dialect_of!(self is SnowflakeDialect | GenericDialect) => { + self.prev_token(); + let custom = self.parse_identifier(false)?; + Ok(DateTimeField::Custom(custom)) + } _ => self.expected("date/time field", next_token), }, _ => self.expected("date/time field", next_token), diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index d9081461b..391f97517 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -1462,6 +1462,19 @@ fn test_bigquery_trim() { ); } +#[test] +fn parse_extract_weekday() { + let sql = "SELECT EXTRACT(WEEK(MONDAY) FROM d)"; + let select = bigquery_and_generic().verified_only_select(sql); + assert_eq!( + &Expr::Extract { + field: DateTimeField::Week(Some(Ident::new("MONDAY"))), + expr: Box::new(Expr::Identifier(Ident::new("d"))), + }, + expr_from_projection(only(&select.projection)), + ); +} + #[test] fn test_select_as_struct() { bigquery().verified_only_select("SELECT * FROM (SELECT AS VALUE STRUCT(123 AS a, false AS b))"); diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index c8551e1fe..c67dcb5b6 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -2271,6 +2271,7 @@ fn parse_extract() { verified_stmt("SELECT EXTRACT(DAYOFWEEK FROM d)"); verified_stmt("SELECT EXTRACT(DAYOFYEAR FROM d)"); verified_stmt("SELECT EXTRACT(DATE FROM d)"); + verified_stmt("SELECT EXTRACT(DATETIME FROM d)"); verified_stmt("SELECT EXTRACT(HOUR FROM d)"); verified_stmt("SELECT EXTRACT(MINUTE FROM d)"); verified_stmt("SELECT EXTRACT(SECOND FROM d)"); @@ -2300,7 +2301,8 @@ fn parse_extract() { verified_stmt("SELECT EXTRACT(TIMEZONE_REGION FROM d)"); verified_stmt("SELECT EXTRACT(TIME FROM d)"); - let res = parse_sql_statements("SELECT EXTRACT(JIFFY FROM d)"); + let dialects = all_dialects_except(|d| d.is::() || d.is::()); + let res = dialects.parse_sql_statements("SELECT EXTRACT(JIFFY FROM d)"); assert_eq!( ParserError::ParserError("Expected date/time field, found: JIFFY".to_string()), res.unwrap_err() @@ -2338,7 +2340,8 @@ fn parse_ceil_datetime() { verified_stmt("SELECT CEIL(d TO SECOND) FROM df"); verified_stmt("SELECT CEIL(d TO MILLISECOND) FROM df"); - let res = parse_sql_statements("SELECT CEIL(d TO JIFFY) FROM df"); + let dialects = all_dialects_except(|d| d.is::() || d.is::()); + let res = dialects.parse_sql_statements("SELECT CEIL(d TO JIFFY) FROM df"); assert_eq!( ParserError::ParserError("Expected date/time field, found: JIFFY".to_string()), res.unwrap_err() @@ -2364,7 +2367,8 @@ fn parse_floor_datetime() { verified_stmt("SELECT FLOOR(d TO SECOND) FROM df"); verified_stmt("SELECT FLOOR(d TO MILLISECOND) FROM df"); - let res = parse_sql_statements("SELECT FLOOR(d TO JIFFY) FROM df"); + let dialects = all_dialects_except(|d| d.is::() || d.is::()); + let res = dialects.parse_sql_statements("SELECT FLOOR(d TO JIFFY) FROM df"); assert_eq!( ParserError::ParserError("Expected date/time field, found: JIFFY".to_string()), res.unwrap_err() diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index 880129f82..5c13457b6 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -1472,6 +1472,19 @@ fn parse_top() { ); } +#[test] +fn parse_extract_custom_part() { + let sql = "SELECT EXTRACT(eod FROM d)"; + let select = snowflake_and_generic().verified_only_select(sql); + assert_eq!( + &Expr::Extract { + field: DateTimeField::Custom(Ident::new("eod")), + expr: Box::new(Expr::Identifier(Ident::new("d"))), + }, + expr_from_projection(only(&select.projection)), + ); +} + #[test] fn parse_comma_outer_join() { // compound identifiers From eda86d8ed79978872dcc5ac729ab1b95da3d6ed4 Mon Sep 17 00:00:00 2001 From: Ifeanyi Ubah Date: Tue, 9 Apr 2024 23:21:22 +0200 Subject: [PATCH 21/42] Add support for arbitrary map access expr (#1179) --- src/ast/mod.rs | 46 ++++++++++++++++----- src/parser/mod.rs | 77 ++++++++++++++++------------------- tests/sqlparser_bigquery.rs | 71 ++++++++++++++++++-------------- tests/sqlparser_clickhouse.rs | 74 +++++++++++++++++---------------- tests/sqlparser_common.rs | 42 +++++++++++++++++++ 5 files changed, 194 insertions(+), 116 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index c5386f878..e02741aac 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -374,6 +374,40 @@ pub enum CastFormat { ValueAtTimeZone(Value, Value), } +/// Represents the syntax/style used in a map access. +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum MapAccessSyntax { + /// Access using bracket notation. `mymap[mykey]` + Bracket, + /// Access using period notation. `mymap.mykey` + Period, +} + +/// Expression used to access a value in a nested structure. +/// +/// Example: `SAFE_OFFSET(0)` in +/// ```sql +/// SELECT mymap[SAFE_OFFSET(0)]; +/// ``` +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct MapAccessKey { + pub key: Expr, + pub syntax: MapAccessSyntax, +} + +impl fmt::Display for MapAccessKey { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self.syntax { + MapAccessSyntax::Bracket => write!(f, "[{}]", self.key), + MapAccessSyntax::Period => write!(f, ".{}", self.key), + } + } +} + /// An SQL expression of any type. /// /// The parser does not distinguish between expressions of different types @@ -638,7 +672,7 @@ pub enum Expr { /// MapAccess { column: Box, - keys: Vec, + keys: Vec, }, /// Scalar function call e.g. `LEFT(foo, 5)` Function(Function), @@ -774,15 +808,7 @@ impl fmt::Display for Expr { match self { Expr::Identifier(s) => write!(f, "{s}"), Expr::MapAccess { column, keys } => { - write!(f, "{column}")?; - for k in keys { - match k { - k @ Expr::Value(Value::Number(_, _)) => write!(f, "[{k}]")?, - Expr::Value(Value::SingleQuotedString(s)) => write!(f, "[\"{s}\"]")?, - _ => write!(f, "[{k}]")?, - } - } - Ok(()) + write!(f, "{column}{}", display_separated(keys, "")) } Expr::Wildcard => f.write_str("*"), Expr::QualifiedWildcard(prefix) => write!(f, "{}.*", prefix), diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 7cdf07bd7..5bae7a133 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -2608,23 +2608,43 @@ impl<'a> Parser<'a> { } pub fn parse_map_access(&mut self, expr: Expr) -> Result { - let key = self.parse_map_key()?; - let tok = self.consume_token(&Token::RBracket); - debug!("Tok: {}", tok); - let mut key_parts: Vec = vec![key]; - while self.consume_token(&Token::LBracket) { - let key = self.parse_map_key()?; - let tok = self.consume_token(&Token::RBracket); - debug!("Tok: {}", tok); - key_parts.push(key); - } - match expr { - e @ Expr::Identifier(_) | e @ Expr::CompoundIdentifier(_) => Ok(Expr::MapAccess { - column: Box::new(e), - keys: key_parts, - }), - _ => Ok(expr), + let key = self.parse_expr()?; + self.expect_token(&Token::RBracket)?; + + let mut keys = vec![MapAccessKey { + key, + syntax: MapAccessSyntax::Bracket, + }]; + loop { + let key = match self.peek_token().token { + Token::LBracket => { + self.next_token(); // consume `[` + let key = self.parse_expr()?; + self.expect_token(&Token::RBracket)?; + MapAccessKey { + key, + syntax: MapAccessSyntax::Bracket, + } + } + // Access on BigQuery nested and repeated expressions can + // mix notations in the same expression. + // https://cloud.google.com/bigquery/docs/nested-repeated#query_nested_and_repeated_columns + Token::Period if dialect_of!(self is BigQueryDialect) => { + self.next_token(); // consume `.` + MapAccessKey { + key: self.parse_expr()?, + syntax: MapAccessSyntax::Period, + } + } + _ => break, + }; + keys.push(key); } + + Ok(Expr::MapAccess { + column: Box::new(expr), + keys, + }) } /// Parses the parens following the `[ NOT ] IN` operator @@ -6329,31 +6349,6 @@ impl<'a> Parser<'a> { } } - /// Parse a map key string - pub fn parse_map_key(&mut self) -> Result { - let next_token = self.next_token(); - match next_token.token { - // handle bigquery offset subscript operator which overlaps with OFFSET operator - Token::Word(Word { value, keyword, .. }) - if (dialect_of!(self is BigQueryDialect) && keyword == Keyword::OFFSET) => - { - self.parse_function(ObjectName(vec![Ident::new(value)])) - } - Token::Word(Word { value, keyword, .. }) if (keyword == Keyword::NoKeyword) => { - if self.peek_token() == Token::LParen { - return self.parse_function(ObjectName(vec![Ident::new(value)])); - } - Ok(Expr::Value(Value::SingleQuotedString(value))) - } - Token::SingleQuotedString(s) => Ok(Expr::Value(Value::SingleQuotedString(s))), - #[cfg(not(feature = "bigdecimal"))] - Token::Number(s, _) => Ok(Expr::Value(Value::Number(s, false))), - #[cfg(feature = "bigdecimal")] - Token::Number(s, _) => Ok(Expr::Value(Value::Number(s.parse().unwrap(), false))), - _ => self.expected("literal string, number or function", next_token), - } - } - /// Parse a SQL datatype (in the context of a CREATE TABLE statement for example) pub fn parse_data_type(&mut self) -> Result { let (ty, trailing_bracket) = self.parse_data_type_helper()?; diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index 391f97517..c8f1bb7c1 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -1402,39 +1402,48 @@ fn bigquery_and_generic() -> TestedDialects { } #[test] -fn parse_map_access_offset() { - let sql = "SELECT d[offset(0)]"; - let _select = bigquery().verified_only_select(sql); - assert_eq!( - _select.projection[0], - SelectItem::UnnamedExpr(Expr::MapAccess { - column: Box::new(Expr::Identifier(Ident { - value: "d".to_string(), - quote_style: None, - })), - keys: vec![Expr::Function(Function { - name: ObjectName(vec!["offset".into()]), - args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value( - number("0") - ))),], - null_treatment: None, - filter: None, - over: None, - distinct: false, - special: false, - order_by: vec![], - })], - }) - ); +fn parse_map_access_expr() { + let sql = "users[-1][safe_offset(2)].a.b"; + let expr = bigquery().verified_expr(sql); - // test other operators - for sql in [ - "SELECT d[SAFE_OFFSET(0)]", - "SELECT d[ORDINAL(0)]", - "SELECT d[SAFE_ORDINAL(0)]", - ] { - bigquery().verified_only_select(sql); + fn map_access_key(key: Expr, syntax: MapAccessSyntax) -> MapAccessKey { + MapAccessKey { key, syntax } } + let expected = Expr::MapAccess { + column: Expr::Identifier(Ident::new("users")).into(), + keys: vec![ + map_access_key( + Expr::UnaryOp { + op: UnaryOperator::Minus, + expr: Expr::Value(number("1")).into(), + }, + MapAccessSyntax::Bracket, + ), + map_access_key( + Expr::Function(Function { + name: ObjectName(vec![Ident::new("safe_offset")]), + args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value( + number("2"), + )))], + filter: None, + null_treatment: None, + over: None, + distinct: false, + special: false, + order_by: vec![], + }), + MapAccessSyntax::Bracket, + ), + map_access_key( + Expr::CompoundIdentifier(vec![Ident::new("a"), Ident::new("b")]), + MapAccessSyntax::Period, + ), + ], + }; + assert_eq!(expr, expected); + + let sql = "SELECT myfunc()[-1].a[SAFE_OFFSET(2)].b"; + bigquery().verified_only_select(sql); } #[test] diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index 1cbe34c5c..a3fcc612b 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -39,23 +39,26 @@ fn parse_map_access_expr() { value: "string_values".to_string(), quote_style: None, })), - keys: vec![Expr::Function(Function { - name: ObjectName(vec!["indexOf".into()]), - args: vec![ - FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Identifier(Ident::new( - "string_names" - )))), - FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value( - Value::SingleQuotedString("endpoint".to_string()) - ))), - ], - null_treatment: None, - filter: None, - over: None, - distinct: false, - special: false, - order_by: vec![], - })], + keys: vec![MapAccessKey { + key: Expr::Function(Function { + name: ObjectName(vec!["indexOf".into()]), + args: vec![ + FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Identifier( + Ident::new("string_names") + ))), + FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value( + Value::SingleQuotedString("endpoint".to_string()) + ))), + ], + null_treatment: None, + filter: None, + over: None, + distinct: false, + special: false, + order_by: vec![], + }), + syntax: MapAccessSyntax::Bracket + }], })], into: None, from: vec![TableWithJoins { @@ -80,23 +83,26 @@ fn parse_map_access_expr() { right: Box::new(BinaryOp { left: Box::new(MapAccess { column: Box::new(Identifier(Ident::new("string_value"))), - keys: vec![Expr::Function(Function { - name: ObjectName(vec![Ident::new("indexOf")]), - args: vec![ - FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Identifier( - Ident::new("string_name") - ))), - FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value( - Value::SingleQuotedString("app".to_string()) - ))), - ], - null_treatment: None, - filter: None, - over: None, - distinct: false, - special: false, - order_by: vec![], - })], + keys: vec![MapAccessKey { + key: Expr::Function(Function { + name: ObjectName(vec![Ident::new("indexOf")]), + args: vec![ + FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Identifier( + Ident::new("string_name") + ))), + FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value( + Value::SingleQuotedString("app".to_string()) + ))), + ], + null_treatment: None, + filter: None, + over: None, + distinct: false, + special: false, + order_by: vec![], + }), + syntax: MapAccessSyntax::Bracket + }], }), op: BinaryOperator::NotEq, right: Box::new(Expr::Value(Value::SingleQuotedString("foo".to_string()))), diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index c67dcb5b6..c94bd3779 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -8643,3 +8643,45 @@ fn test_buffer_reuse() { p.parse_statements().unwrap(); let _ = p.into_tokens(); } + +#[test] +fn parse_map_access_expr() { + let sql = "users[-1][safe_offset(2)]"; + let dialects = TestedDialects { + dialects: vec![Box::new(BigQueryDialect {}), Box::new(ClickHouseDialect {})], + options: None, + }; + let expr = dialects.verified_expr(sql); + let expected = Expr::MapAccess { + column: Expr::Identifier(Ident::new("users")).into(), + keys: vec![ + MapAccessKey { + key: Expr::UnaryOp { + op: UnaryOperator::Minus, + expr: Expr::Value(number("1")).into(), + }, + syntax: MapAccessSyntax::Bracket, + }, + MapAccessKey { + key: Expr::Function(Function { + name: ObjectName(vec![Ident::new("safe_offset")]), + args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value( + number("2"), + )))], + filter: None, + null_treatment: None, + over: None, + distinct: false, + special: false, + order_by: vec![], + }), + syntax: MapAccessSyntax::Bracket, + }, + ], + }; + assert_eq!(expr, expected); + + for sql in ["users[1]", "a[array_length(b) - 1 + 2][c + 3][d * 4]"] { + let _ = dialects.verified_expr(sql); + } +} From a0ed14ce023ec14162ad2406fe655b487b85aa84 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 9 Apr 2024 17:23:22 -0400 Subject: [PATCH 22/42] Do not allocate in `impl Display for DateTimeField` (#1209) --- src/ast/value.rs | 97 +++++++++++++++++++++--------------------------- 1 file changed, 43 insertions(+), 54 deletions(-) diff --git a/src/ast/value.rs b/src/ast/value.rs index d596cd648..84fdf00ae 100644 --- a/src/ast/value.rs +++ b/src/ast/value.rs @@ -13,12 +13,6 @@ #[cfg(not(feature = "std"))] use alloc::string::String; -#[cfg(not(feature = "std"))] -use alloc::format; - -#[cfg(not(feature = "std"))] -use alloc::string::ToString; - use core::fmt; #[cfg(feature = "bigdecimal")] @@ -175,56 +169,51 @@ pub enum DateTimeField { impl fmt::Display for DateTimeField { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - f.write_str( - match self { - DateTimeField::Year => "YEAR".to_string(), - DateTimeField::Month => "MONTH".to_string(), - DateTimeField::Week(week_day) => { - format!( - "WEEK{}", - week_day - .as_ref() - .map(|w| format!("({w})")) - .unwrap_or_default() - ) + match self { + DateTimeField::Year => write!(f, "YEAR"), + DateTimeField::Month => write!(f, "MONTH"), + DateTimeField::Week(week_day) => { + write!(f, "WEEK")?; + if let Some(week_day) = week_day { + write!(f, "({week_day})")? } - DateTimeField::Day => "DAY".to_string(), - DateTimeField::DayOfWeek => "DAYOFWEEK".to_string(), - DateTimeField::DayOfYear => "DAYOFYEAR".to_string(), - DateTimeField::Date => "DATE".to_string(), - DateTimeField::Datetime => "DATETIME".to_string(), - DateTimeField::Hour => "HOUR".to_string(), - DateTimeField::Minute => "MINUTE".to_string(), - DateTimeField::Second => "SECOND".to_string(), - DateTimeField::Century => "CENTURY".to_string(), - DateTimeField::Decade => "DECADE".to_string(), - DateTimeField::Dow => "DOW".to_string(), - DateTimeField::Doy => "DOY".to_string(), - DateTimeField::Epoch => "EPOCH".to_string(), - DateTimeField::Isodow => "ISODOW".to_string(), - DateTimeField::Isoyear => "ISOYEAR".to_string(), - DateTimeField::IsoWeek => "ISOWEEK".to_string(), - DateTimeField::Julian => "JULIAN".to_string(), - DateTimeField::Microsecond => "MICROSECOND".to_string(), - DateTimeField::Microseconds => "MICROSECONDS".to_string(), - DateTimeField::Millenium => "MILLENIUM".to_string(), - DateTimeField::Millennium => "MILLENNIUM".to_string(), - DateTimeField::Millisecond => "MILLISECOND".to_string(), - DateTimeField::Milliseconds => "MILLISECONDS".to_string(), - DateTimeField::Nanosecond => "NANOSECOND".to_string(), - DateTimeField::Nanoseconds => "NANOSECONDS".to_string(), - DateTimeField::Quarter => "QUARTER".to_string(), - DateTimeField::Time => "TIME".to_string(), - DateTimeField::Timezone => "TIMEZONE".to_string(), - DateTimeField::TimezoneAbbr => "TIMEZONE_ABBR".to_string(), - DateTimeField::TimezoneHour => "TIMEZONE_HOUR".to_string(), - DateTimeField::TimezoneMinute => "TIMEZONE_MINUTE".to_string(), - DateTimeField::TimezoneRegion => "TIMEZONE_REGION".to_string(), - DateTimeField::NoDateTime => "NODATETIME".to_string(), - DateTimeField::Custom(custom) => format!("{custom}"), + Ok(()) } - .as_str(), - ) + DateTimeField::Day => write!(f, "DAY"), + DateTimeField::DayOfWeek => write!(f, "DAYOFWEEK"), + DateTimeField::DayOfYear => write!(f, "DAYOFYEAR"), + DateTimeField::Date => write!(f, "DATE"), + DateTimeField::Datetime => write!(f, "DATETIME"), + DateTimeField::Hour => write!(f, "HOUR"), + DateTimeField::Minute => write!(f, "MINUTE"), + DateTimeField::Second => write!(f, "SECOND"), + DateTimeField::Century => write!(f, "CENTURY"), + DateTimeField::Decade => write!(f, "DECADE"), + DateTimeField::Dow => write!(f, "DOW"), + DateTimeField::Doy => write!(f, "DOY"), + DateTimeField::Epoch => write!(f, "EPOCH"), + DateTimeField::Isodow => write!(f, "ISODOW"), + DateTimeField::Isoyear => write!(f, "ISOYEAR"), + DateTimeField::IsoWeek => write!(f, "ISOWEEK"), + DateTimeField::Julian => write!(f, "JULIAN"), + DateTimeField::Microsecond => write!(f, "MICROSECOND"), + DateTimeField::Microseconds => write!(f, "MICROSECONDS"), + DateTimeField::Millenium => write!(f, "MILLENIUM"), + DateTimeField::Millennium => write!(f, "MILLENNIUM"), + DateTimeField::Millisecond => write!(f, "MILLISECOND"), + DateTimeField::Milliseconds => write!(f, "MILLISECONDS"), + DateTimeField::Nanosecond => write!(f, "NANOSECOND"), + DateTimeField::Nanoseconds => write!(f, "NANOSECONDS"), + DateTimeField::Quarter => write!(f, "QUARTER"), + DateTimeField::Time => write!(f, "TIME"), + DateTimeField::Timezone => write!(f, "TIMEZONE"), + DateTimeField::TimezoneAbbr => write!(f, "TIMEZONE_ABBR"), + DateTimeField::TimezoneHour => write!(f, "TIMEZONE_HOUR"), + DateTimeField::TimezoneMinute => write!(f, "TIMEZONE_MINUTE"), + DateTimeField::TimezoneRegion => write!(f, "TIMEZONE_REGION"), + DateTimeField::NoDateTime => write!(f, "NODATETIME"), + DateTimeField::Custom(custom) => write!(f, "{custom}"), + } } } From e5c860213b1d1c73a0090f4ca16023cbdbf81b58 Mon Sep 17 00:00:00 2001 From: ZacJW Date: Fri, 12 Apr 2024 11:38:04 +0100 Subject: [PATCH 23/42] Fix dollar quoted string tokenizer (#1193) --- src/tokenizer.rs | 130 ++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 105 insertions(+), 25 deletions(-) diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 1ceec705b..b239d990e 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -1119,38 +1119,49 @@ impl<'a> Tokenizer<'a> { if let Some('$') = chars.peek() { chars.next(); - s.push_str(&peeking_take_while(chars, |ch| ch != '$')); - match chars.peek() { - Some('$') => { - chars.next(); - for c in value.chars() { - let next_char = chars.next(); - if Some(c) != next_char { - return self.tokenizer_error( - chars.location(), - format!( - "Unterminated dollar-quoted string at or near \"{value}\"" - ), - ); + 'searching_for_end: loop { + s.push_str(&peeking_take_while(chars, |ch| ch != '$')); + match chars.peek() { + Some('$') => { + chars.next(); + let mut maybe_s = String::from("$"); + for c in value.chars() { + if let Some(next_char) = chars.next() { + maybe_s.push(next_char); + if next_char != c { + // This doesn't match the dollar quote delimiter so this + // is not the end of the string. + s.push_str(&maybe_s); + continue 'searching_for_end; + } + } else { + return self.tokenizer_error( + chars.location(), + "Unterminated dollar-quoted, expected $", + ); + } + } + if chars.peek() == Some(&'$') { + chars.next(); + maybe_s.push('$'); + // maybe_s matches the end delimiter + break 'searching_for_end; + } else { + // This also doesn't match the dollar quote delimiter as there are + // more characters before the second dollar so this is not the end + // of the string. + s.push_str(&maybe_s); + continue 'searching_for_end; } } - - if let Some('$') = chars.peek() { - chars.next(); - } else { + _ => { return self.tokenizer_error( chars.location(), - "Unterminated dollar-quoted string, expected $", - ); + "Unterminated dollar-quoted, expected $", + ) } } - _ => { - return self.tokenizer_error( - chars.location(), - "Unterminated dollar-quoted, expected $", - ); - } } } else { return Ok(Token::Placeholder(String::from("$") + &value)); @@ -1906,6 +1917,75 @@ mod tests { compare(expected, tokens); } + #[test] + fn tokenize_dollar_quoted_string_tagged() { + let sql = String::from( + "SELECT $tag$dollar '$' quoted strings have $tags like this$ or like this $$$tag$", + ); + let dialect = GenericDialect {}; + let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); + let expected = vec![ + Token::make_keyword("SELECT"), + Token::Whitespace(Whitespace::Space), + Token::DollarQuotedString(DollarQuotedString { + value: "dollar '$' quoted strings have $tags like this$ or like this $$".into(), + tag: Some("tag".into()), + }), + ]; + compare(expected, tokens); + } + + #[test] + fn tokenize_dollar_quoted_string_tagged_unterminated() { + let sql = String::from("SELECT $tag$dollar '$' quoted strings have $tags like this$ or like this $$$different tag$"); + let dialect = GenericDialect {}; + assert_eq!( + Tokenizer::new(&dialect, &sql).tokenize(), + Err(TokenizerError { + message: "Unterminated dollar-quoted, expected $".into(), + location: Location { + line: 1, + column: 91 + } + }) + ); + } + + #[test] + fn tokenize_dollar_quoted_string_untagged() { + let sql = + String::from("SELECT $$within dollar '$' quoted strings have $tags like this$ $$"); + let dialect = GenericDialect {}; + let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); + let expected = vec![ + Token::make_keyword("SELECT"), + Token::Whitespace(Whitespace::Space), + Token::DollarQuotedString(DollarQuotedString { + value: "within dollar '$' quoted strings have $tags like this$ ".into(), + tag: None, + }), + ]; + compare(expected, tokens); + } + + #[test] + fn tokenize_dollar_quoted_string_untagged_unterminated() { + let sql = String::from( + "SELECT $$dollar '$' quoted strings have $tags like this$ or like this $different tag$", + ); + let dialect = GenericDialect {}; + assert_eq!( + Tokenizer::new(&dialect, &sql).tokenize(), + Err(TokenizerError { + message: "Unterminated dollar-quoted string".into(), + location: Location { + line: 1, + column: 86 + } + }) + ); + } + #[test] fn tokenize_right_arrow() { let sql = String::from("FUNCTION(key=>value)"); From acc5dd937622b60a1f47c3d1679df6ca94f1198b Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Fri, 12 Apr 2024 06:52:11 -0400 Subject: [PATCH 24/42] CHANGELOG for 0.45.0 (#1213) --- CHANGELOG.md | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index e0c9b22db..ecd57703c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,37 @@ changes that break via addition as "Added". ## [Unreleased] Check https://github.com/sqlparser-rs/sqlparser-rs/commits/main for undocumented changes. +## [0.45.0] 2024-04-12 + +### Added +* Support `DateTimeField` variants: `CUSTOM` and `WEEK(MONDAY)` (#1191) - Thanks @iffyio +* Support for arbitrary expr in `MapAccessSyntax` (#1179) - Thanks @iffyio +* Support unquoted hyphen in table/view declaration for BigQuery (#1178) - Thanks @iffyio +* Support `CREATE/DROP SECRET` for duckdb dialect (#1208) - Thanks @JichaoS +* Support MySQL `UNIQUE` table constraint (#1164) - Thanks @Nikita-str +* Support tailing commas on Snowflake. (#1205) - Thanks @yassun7010 +* Support `[FIRST | AFTER column_name]` in `ALTER TABLE` for MySQL (#1180) - Thanks @xring +* Support inline comment with hash syntax for BigQuery (#1192) - Thanks @iffyio +* Support named windows in OVER (window_definition) clause (#1166) - Thanks @Nikita-str +* Support PARALLEL ... and for ..ON NULL INPUT ... to CREATE FUNCTION` (#1202) - Thanks @dimfeld +* Support DuckDB functions named arguments with assignment operator (#1195) - Thanks @alamb +* Support DuckDB struct literal syntax (#1194) - Thanks @gstvg +* Support `$$` in generic dialect ... (#1185)- Thanks @milenkovicm +* Support row_alias and col_aliases in `INSERT` statement for MySQL and Generic dialects (#1136) - Thanks @emin100 + +### Fixed +* Fix dollar quoted string tokenizer (#1193) - Thanks @ZacJW +* Do not allocate in `impl Display` for `DateTimeField` (#1209) - Thanks @alamb +* Fix parse `COPY INTO` stage names without parens for SnowFlake (#1187) - Thanks @mobuchowski +* Solve stack overflow on RecursionLimitExceeded on debug builds (#1171) - Thanks @Nikita-str +* Fix parsing of equality binary operator in function argument (#1182) - Thanks @jmhain +* Fix some comments (#1184) - Thanks @sunxunle + +### Changed +* Cleanup `CREATE FUNCTION` tests (#1203) - Thanks @alamb +* Parse `SUBSTRING FROM` syntax in all dialects, reflect change in the AST (#1173) - Thanks @lovasoa +* Add identifier quote style to Dialect trait (#1170) - Thanks @backkem + ## [0.44.0] 2024-03-02 ### Added From 2f03fad3394549fc629ebefd1298ada9d3333831 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Fri, 12 Apr 2024 06:54:36 -0400 Subject: [PATCH 25/42] chore: Release sqlparser version 0.45.0 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index ed3d88b9f..3c5d4651c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "sqlparser" description = "Extensible SQL Lexer and Parser with support for ANSI SQL:2011" -version = "0.44.0" +version = "0.45.0" authors = ["Andy Grove "] homepage = "https://github.com/sqlparser-rs/sqlparser-rs" documentation = "https://docs.rs/sqlparser/" From 9db20e293f8eac2d8029146a62809ce5069d1034 Mon Sep 17 00:00:00 2001 From: Hiranmaya Gundu Date: Sun, 21 Apr 2024 05:20:41 -0700 Subject: [PATCH 26/42] fix: have wildcard replace work in duckdb and snowflake syntax (#1226) --- src/parser/mod.rs | 2 +- tests/sqlparser_bigquery.rs | 43 ------------------- tests/sqlparser_clickhouse.rs | 5 --- tests/sqlparser_common.rs | 78 +++++++++++++++++++++++++++++++++++ 4 files changed, 79 insertions(+), 49 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 5bae7a133..de96625be 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -9059,7 +9059,7 @@ impl<'a> Parser<'a> { None }; - let opt_replace = if dialect_of!(self is GenericDialect | BigQueryDialect | ClickHouseDialect) + let opt_replace = if dialect_of!(self is GenericDialect | BigQueryDialect | ClickHouseDialect | DuckDbDialect | SnowflakeDialect) { self.parse_optional_select_item_replace()? } else { diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index c8f1bb7c1..43e6a84b7 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -1280,49 +1280,6 @@ fn test_select_wildcard_with_except() { ); } -#[test] -fn test_select_wildcard_with_replace() { - let select = bigquery_and_generic() - .verified_only_select(r#"SELECT * REPLACE ('widget' AS item_name) FROM orders"#); - let expected = SelectItem::Wildcard(WildcardAdditionalOptions { - opt_replace: Some(ReplaceSelectItem { - items: vec![Box::new(ReplaceSelectElement { - expr: Expr::Value(Value::SingleQuotedString("widget".to_owned())), - column_name: Ident::new("item_name"), - as_keyword: true, - })], - }), - ..Default::default() - }); - assert_eq!(expected, select.projection[0]); - - let select = bigquery_and_generic().verified_only_select( - r#"SELECT * REPLACE (quantity / 2 AS quantity, 3 AS order_id) FROM orders"#, - ); - let expected = SelectItem::Wildcard(WildcardAdditionalOptions { - opt_replace: Some(ReplaceSelectItem { - items: vec![ - Box::new(ReplaceSelectElement { - expr: Expr::BinaryOp { - left: Box::new(Expr::Identifier(Ident::new("quantity"))), - op: BinaryOperator::Divide, - right: Box::new(Expr::Value(number("2"))), - }, - column_name: Ident::new("quantity"), - as_keyword: true, - }), - Box::new(ReplaceSelectElement { - expr: Expr::Value(number("3")), - column_name: Ident::new("order_id"), - as_keyword: true, - }), - ], - }), - ..Default::default() - }); - assert_eq!(expected, select.projection[0]); -} - #[test] fn parse_big_query_declare() { for (sql, expected_names, expected_data_type, expected_assigned_expr) in [ diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index a3fcc612b..22396d064 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -382,11 +382,6 @@ fn parse_select_star_except_no_parens() { ); } -#[test] -fn parse_select_star_replace() { - clickhouse().verified_stmt("SELECT * REPLACE (i + 1 AS i) FROM columns_transformers"); -} - fn clickhouse() -> TestedDialects { TestedDialects { dialects: vec![Box::new(ClickHouseDialect {})], diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index c94bd3779..6c95b6c56 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -8685,3 +8685,81 @@ fn parse_map_access_expr() { let _ = dialects.verified_expr(sql); } } + +#[test] +fn test_select_wildcard_with_replace() { + let sql = r#"SELECT * REPLACE (lower(city) AS city) FROM addresses"#; + let dialects = TestedDialects { + dialects: vec![ + Box::new(GenericDialect {}), + Box::new(BigQueryDialect {}), + Box::new(ClickHouseDialect {}), + Box::new(SnowflakeDialect {}), + Box::new(DuckDbDialect {}), + ], + options: None, + }; + let select = dialects.verified_only_select(sql); + let expected = SelectItem::Wildcard(WildcardAdditionalOptions { + opt_replace: Some(ReplaceSelectItem { + items: vec![Box::new(ReplaceSelectElement { + expr: Expr::Function(Function { + name: ObjectName(vec![Ident::new("lower")]), + args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr( + Expr::Identifier(Ident::new("city")), + ))], + filter: None, + null_treatment: None, + over: None, + distinct: false, + special: false, + order_by: vec![], + }), + column_name: Ident::new("city"), + as_keyword: true, + })], + }), + ..Default::default() + }); + assert_eq!(expected, select.projection[0]); + + let select = + dialects.verified_only_select(r#"SELECT * REPLACE ('widget' AS item_name) FROM orders"#); + let expected = SelectItem::Wildcard(WildcardAdditionalOptions { + opt_replace: Some(ReplaceSelectItem { + items: vec![Box::new(ReplaceSelectElement { + expr: Expr::Value(Value::SingleQuotedString("widget".to_owned())), + column_name: Ident::new("item_name"), + as_keyword: true, + })], + }), + ..Default::default() + }); + assert_eq!(expected, select.projection[0]); + + let select = dialects.verified_only_select( + r#"SELECT * REPLACE (quantity / 2 AS quantity, 3 AS order_id) FROM orders"#, + ); + let expected = SelectItem::Wildcard(WildcardAdditionalOptions { + opt_replace: Some(ReplaceSelectItem { + items: vec![ + Box::new(ReplaceSelectElement { + expr: Expr::BinaryOp { + left: Box::new(Expr::Identifier(Ident::new("quantity"))), + op: BinaryOperator::Divide, + right: Box::new(Expr::Value(number("2"))), + }, + column_name: Ident::new("quantity"), + as_keyword: true, + }), + Box::new(ReplaceSelectElement { + expr: Expr::Value(number("3")), + column_name: Ident::new("order_id"), + as_keyword: true, + }), + ], + }), + ..Default::default() + }); + assert_eq!(expected, select.projection[0]); +} From d1f67bdc4731b22f780280adf9203ccc48e87ed7 Mon Sep 17 00:00:00 2001 From: Joey Hain Date: Sun, 21 Apr 2024 05:21:58 -0700 Subject: [PATCH 27/42] Preserve double colon casts (and simplify cast representations) (#1221) --- src/ast/mod.rs | 90 ++++++++++++++++++------------------ src/parser/mod.rs | 47 +++++-------------- tests/sqlparser_common.rs | 13 +++++- tests/sqlparser_postgres.rs | 14 +++--- tests/sqlparser_snowflake.rs | 3 +- 5 files changed, 79 insertions(+), 88 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index e02741aac..17b1819b6 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -408,6 +408,26 @@ impl fmt::Display for MapAccessKey { } } +/// The syntax used for in a cast expression. +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum CastKind { + /// The standard SQL cast syntax, e.g. `CAST( as )` + Cast, + /// A cast that returns `NULL` on failure, e.g. `TRY_CAST( as )`. + /// + /// See . + /// See . + TryCast, + /// A cast that returns `NULL` on failure, bigQuery-specific , e.g. `SAFE_CAST( as )`. + /// + /// See . + SafeCast, + /// ` :: ` + DoubleColon, +} + /// An SQL expression of any type. /// /// The parser does not distinguish between expressions of different types @@ -546,25 +566,7 @@ pub enum Expr { }, /// `CAST` an expression to a different data type e.g. `CAST(foo AS VARCHAR(123))` Cast { - expr: Box, - data_type: DataType, - // Optional CAST(string_expression AS type FORMAT format_string_expression) as used by BigQuery - // https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#formatting_syntax - format: Option, - }, - /// `TRY_CAST` an expression to a different data type e.g. `TRY_CAST(foo AS VARCHAR(123))` - // this differs from CAST in the choice of how to implement invalid conversions - TryCast { - expr: Box, - data_type: DataType, - // Optional CAST(string_expression AS type FORMAT format_string_expression) as used by BigQuery - // https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#formatting_syntax - format: Option, - }, - /// `SAFE_CAST` an expression to a different data type e.g. `SAFE_CAST(foo AS FLOAT64)` - // only available for BigQuery: https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-and-operators#safe_casting - // this works the same as `TRY_CAST` - SafeCast { + kind: CastKind, expr: Box, data_type: DataType, // Optional CAST(string_expression AS type FORMAT format_string_expression) as used by BigQuery @@ -989,38 +991,36 @@ impl fmt::Display for Expr { write!(f, ")") } Expr::Cast { + kind, expr, data_type, format, - } => { - if let Some(format) = format { - write!(f, "CAST({expr} AS {data_type} FORMAT {format})") - } else { - write!(f, "CAST({expr} AS {data_type})") + } => match kind { + CastKind::Cast => { + if let Some(format) = format { + write!(f, "CAST({expr} AS {data_type} FORMAT {format})") + } else { + write!(f, "CAST({expr} AS {data_type})") + } } - } - Expr::TryCast { - expr, - data_type, - format, - } => { - if let Some(format) = format { - write!(f, "TRY_CAST({expr} AS {data_type} FORMAT {format})") - } else { - write!(f, "TRY_CAST({expr} AS {data_type})") + CastKind::TryCast => { + if let Some(format) = format { + write!(f, "TRY_CAST({expr} AS {data_type} FORMAT {format})") + } else { + write!(f, "TRY_CAST({expr} AS {data_type})") + } } - } - Expr::SafeCast { - expr, - data_type, - format, - } => { - if let Some(format) = format { - write!(f, "SAFE_CAST({expr} AS {data_type} FORMAT {format})") - } else { - write!(f, "SAFE_CAST({expr} AS {data_type})") + CastKind::SafeCast => { + if let Some(format) = format { + write!(f, "SAFE_CAST({expr} AS {data_type} FORMAT {format})") + } else { + write!(f, "SAFE_CAST({expr} AS {data_type})") + } } - } + CastKind::DoubleColon => { + write!(f, "{expr}::{data_type}") + } + }, Expr::Extract { field, expr } => write!(f, "EXTRACT({field} FROM {expr})"), Expr::Ceil { expr, field } => { if field == &DateTimeField::NoDateTime { diff --git a/src/parser/mod.rs b/src/parser/mod.rs index de96625be..a45c9da33 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1004,9 +1004,9 @@ impl<'a> Parser<'a> { } Keyword::CASE => self.parse_case_expr(), Keyword::CONVERT => self.parse_convert_expr(), - Keyword::CAST => self.parse_cast_expr(), - Keyword::TRY_CAST => self.parse_try_cast_expr(), - Keyword::SAFE_CAST => self.parse_safe_cast_expr(), + Keyword::CAST => self.parse_cast_expr(CastKind::Cast), + Keyword::TRY_CAST => self.parse_cast_expr(CastKind::TryCast), + Keyword::SAFE_CAST => self.parse_cast_expr(CastKind::SafeCast), Keyword::EXISTS => self.parse_exists_expr(false), Keyword::EXTRACT => self.parse_extract_expr(), Keyword::CEIL => self.parse_ceil_floor_expr(true), @@ -1491,7 +1491,7 @@ impl<'a> Parser<'a> { } /// Parse a SQL CAST function e.g. `CAST(expr AS FLOAT)` - pub fn parse_cast_expr(&mut self) -> Result { + pub fn parse_cast_expr(&mut self, kind: CastKind) -> Result { self.expect_token(&Token::LParen)?; let expr = self.parse_expr()?; self.expect_keyword(Keyword::AS)?; @@ -1499,36 +1499,7 @@ impl<'a> Parser<'a> { let format = self.parse_optional_cast_format()?; self.expect_token(&Token::RParen)?; Ok(Expr::Cast { - expr: Box::new(expr), - data_type, - format, - }) - } - - /// Parse a SQL TRY_CAST function e.g. `TRY_CAST(expr AS FLOAT)` - pub fn parse_try_cast_expr(&mut self) -> Result { - self.expect_token(&Token::LParen)?; - let expr = self.parse_expr()?; - self.expect_keyword(Keyword::AS)?; - let data_type = self.parse_data_type()?; - let format = self.parse_optional_cast_format()?; - self.expect_token(&Token::RParen)?; - Ok(Expr::TryCast { - expr: Box::new(expr), - data_type, - format, - }) - } - - /// Parse a BigQuery SAFE_CAST function e.g. `SAFE_CAST(expr AS FLOAT64)` - pub fn parse_safe_cast_expr(&mut self) -> Result { - self.expect_token(&Token::LParen)?; - let expr = self.parse_expr()?; - self.expect_keyword(Keyword::AS)?; - let data_type = self.parse_data_type()?; - let format = self.parse_optional_cast_format()?; - self.expect_token(&Token::RParen)?; - Ok(Expr::SafeCast { + kind, expr: Box::new(expr), data_type, format, @@ -2528,7 +2499,12 @@ impl<'a> Parser<'a> { ), } } else if Token::DoubleColon == tok { - self.parse_pg_cast(expr) + Ok(Expr::Cast { + kind: CastKind::DoubleColon, + expr: Box::new(expr), + data_type: self.parse_data_type()?, + format: None, + }) } else if Token::ExclamationMark == tok { // PostgreSQL factorial operation Ok(Expr::UnaryOp { @@ -2702,6 +2678,7 @@ impl<'a> Parser<'a> { /// Parse a postgresql casting style which is in the form of `expr::datatype` pub fn parse_pg_cast(&mut self, expr: Expr) -> Result { Ok(Expr::Cast { + kind: CastKind::DoubleColon, expr: Box::new(expr), data_type: self.parse_data_type()?, format: None, diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 6c95b6c56..3aa84b923 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -2107,6 +2107,7 @@ fn parse_cast() { let select = verified_only_select(sql); assert_eq!( &Expr::Cast { + kind: CastKind::Cast, expr: Box::new(Expr::Identifier(Ident::new("id"))), data_type: DataType::BigInt(None), format: None, @@ -2118,6 +2119,7 @@ fn parse_cast() { let select = verified_only_select(sql); assert_eq!( &Expr::Cast { + kind: CastKind::Cast, expr: Box::new(Expr::Identifier(Ident::new("id"))), data_type: DataType::TinyInt(None), format: None, @@ -2145,6 +2147,7 @@ fn parse_cast() { let select = verified_only_select(sql); assert_eq!( &Expr::Cast { + kind: CastKind::Cast, expr: Box::new(Expr::Identifier(Ident::new("id"))), data_type: DataType::Nvarchar(Some(50)), format: None, @@ -2156,6 +2159,7 @@ fn parse_cast() { let select = verified_only_select(sql); assert_eq!( &Expr::Cast { + kind: CastKind::Cast, expr: Box::new(Expr::Identifier(Ident::new("id"))), data_type: DataType::Clob(None), format: None, @@ -2167,6 +2171,7 @@ fn parse_cast() { let select = verified_only_select(sql); assert_eq!( &Expr::Cast { + kind: CastKind::Cast, expr: Box::new(Expr::Identifier(Ident::new("id"))), data_type: DataType::Clob(Some(50)), format: None, @@ -2178,6 +2183,7 @@ fn parse_cast() { let select = verified_only_select(sql); assert_eq!( &Expr::Cast { + kind: CastKind::Cast, expr: Box::new(Expr::Identifier(Ident::new("id"))), data_type: DataType::Binary(Some(50)), format: None, @@ -2189,6 +2195,7 @@ fn parse_cast() { let select = verified_only_select(sql); assert_eq!( &Expr::Cast { + kind: CastKind::Cast, expr: Box::new(Expr::Identifier(Ident::new("id"))), data_type: DataType::Varbinary(Some(50)), format: None, @@ -2200,6 +2207,7 @@ fn parse_cast() { let select = verified_only_select(sql); assert_eq!( &Expr::Cast { + kind: CastKind::Cast, expr: Box::new(Expr::Identifier(Ident::new("id"))), data_type: DataType::Blob(None), format: None, @@ -2211,6 +2219,7 @@ fn parse_cast() { let select = verified_only_select(sql); assert_eq!( &Expr::Cast { + kind: CastKind::Cast, expr: Box::new(Expr::Identifier(Ident::new("id"))), data_type: DataType::Blob(Some(50)), format: None, @@ -2222,6 +2231,7 @@ fn parse_cast() { let select = verified_only_select(sql); assert_eq!( &Expr::Cast { + kind: CastKind::Cast, expr: Box::new(Expr::Identifier(Ident::new("details"))), data_type: DataType::JSONB, format: None, @@ -2235,7 +2245,8 @@ fn parse_try_cast() { let sql = "SELECT TRY_CAST(id AS BIGINT) FROM customer"; let select = verified_only_select(sql); assert_eq!( - &Expr::TryCast { + &Expr::Cast { + kind: CastKind::TryCast, expr: Box::new(Expr::Identifier(Ident::new("id"))), data_type: DataType::BigInt(None), format: None, diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index ea5c9875b..38e32780d 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -328,6 +328,7 @@ fn parse_create_table_with_defaults() { location: None, .. } => { + use pretty_assertions::assert_eq; assert_eq!("public.customer", name.to_string()); assert_eq!( columns, @@ -422,9 +423,7 @@ fn parse_create_table_with_defaults() { options: vec![ ColumnOptionDef { name: None, - option: ColumnOption::Default( - pg().verified_expr("CAST(now() AS TEXT)") - ) + option: ColumnOption::Default(pg().verified_expr("now()::TEXT")) }, ColumnOptionDef { name: None, @@ -498,15 +497,15 @@ fn parse_create_table_from_pg_dump() { active int )"; pg().one_statement_parses_to(sql, "CREATE TABLE public.customer (\ - customer_id INTEGER DEFAULT nextval(CAST('public.customer_customer_id_seq' AS REGCLASS)) NOT NULL, \ + customer_id INTEGER DEFAULT nextval('public.customer_customer_id_seq'::REGCLASS) NOT NULL, \ store_id SMALLINT NOT NULL, \ first_name CHARACTER VARYING(45) NOT NULL, \ last_name CHARACTER VARYING(45) NOT NULL, \ info TEXT[], \ address_id SMALLINT NOT NULL, \ activebool BOOLEAN DEFAULT true NOT NULL, \ - create_date DATE DEFAULT CAST(now() AS DATE) NOT NULL, \ - create_date1 DATE DEFAULT CAST(CAST('now' AS TEXT) AS DATE) NOT NULL, \ + create_date DATE DEFAULT now()::DATE NOT NULL, \ + create_date1 DATE DEFAULT 'now'::TEXT::DATE NOT NULL, \ last_update TIMESTAMP WITHOUT TIME ZONE DEFAULT now(), \ release_year public.year, \ active INT\ @@ -1448,11 +1447,13 @@ fn parse_execute() { parameters: vec![], using: vec![ Expr::Cast { + kind: CastKind::Cast, expr: Box::new(Expr::Value(Value::Number("1337".parse().unwrap(), false))), data_type: DataType::SmallInt(None), format: None }, Expr::Cast { + kind: CastKind::Cast, expr: Box::new(Expr::Value(Value::Number("7331".parse().unwrap(), false))), data_type: DataType::SmallInt(None), format: None @@ -1908,6 +1909,7 @@ fn parse_array_index_expr() { assert_eq!( &Expr::ArrayIndex { obj: Box::new(Expr::Nested(Box::new(Expr::Cast { + kind: CastKind::Cast, expr: Box::new(Expr::Array(Array { elem: vec![Expr::Array(Array { elem: vec![num[2].clone(), num[3].clone(),], diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index 5c13457b6..b76e84ed4 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -168,6 +168,7 @@ fn parse_array() { let select = snowflake().verified_only_select(sql); assert_eq!( &Expr::Cast { + kind: CastKind::Cast, expr: Box::new(Expr::Identifier(Ident::new("a"))), data_type: DataType::Array(ArrayElemTypeDef::None), format: None, @@ -228,7 +229,7 @@ fn parse_json_using_colon() { select.projection[0] ); - snowflake().one_statement_parses_to("SELECT a:b::int FROM t", "SELECT CAST(a:b AS INT) FROM t"); + snowflake().verified_stmt("SELECT a:b::INT FROM t"); let sql = "SELECT a:start, a:end FROM t"; let select = snowflake().verified_only_select(sql); From 4604628c435ca2522a4cce5ec3a9e99f2c677311 Mon Sep 17 00:00:00 2001 From: Hiranmaya Gundu Date: Sun, 21 Apr 2024 05:22:08 -0700 Subject: [PATCH 28/42] feat: implement select * ilike for snowflake (#1228) --- src/ast/mod.rs | 4 ++-- src/ast/query.rs | 29 +++++++++++++++++++++++++++ src/parser/mod.rs | 28 +++++++++++++++++++++++++- tests/sqlparser_common.rs | 1 + tests/sqlparser_duckdb.rs | 2 ++ tests/sqlparser_snowflake.rs | 39 ++++++++++++++++++++++++++++++++++++ 6 files changed, 100 insertions(+), 3 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 17b1819b6..31924e051 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -40,8 +40,8 @@ pub use self::ddl::{ pub use self::operator::{BinaryOperator, UnaryOperator}; pub use self::query::{ Cte, CteAsMaterialized, Distinct, ExceptSelectItem, ExcludeSelectItem, Fetch, ForClause, - ForJson, ForXml, GroupByExpr, IdentWithAlias, Join, JoinConstraint, JoinOperator, - JsonTableColumn, JsonTableColumnErrorHandling, LateralView, LockClause, LockType, + ForJson, ForXml, GroupByExpr, IdentWithAlias, IlikeSelectItem, Join, JoinConstraint, + JoinOperator, JsonTableColumn, JsonTableColumnErrorHandling, LateralView, LockClause, LockType, NamedWindowDefinition, NonBlock, Offset, OffsetRows, OrderByExpr, Query, RenameSelectItem, ReplaceSelectElement, ReplaceSelectItem, Select, SelectInto, SelectItem, SetExpr, SetOperator, SetQuantifier, Table, TableAlias, TableFactor, TableVersion, TableWithJoins, Top, TopQuantity, diff --git a/src/ast/query.rs b/src/ast/query.rs index bf33cdee6..391ef51d8 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -474,6 +474,9 @@ impl fmt::Display for IdentWithAlias { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct WildcardAdditionalOptions { + /// `[ILIKE...]`. + /// Snowflake syntax: + pub opt_ilike: Option, /// `[EXCLUDE...]`. pub opt_exclude: Option, /// `[EXCEPT...]`. @@ -489,6 +492,9 @@ pub struct WildcardAdditionalOptions { impl fmt::Display for WildcardAdditionalOptions { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + if let Some(ilike) = &self.opt_ilike { + write!(f, " {ilike}")?; + } if let Some(exclude) = &self.opt_exclude { write!(f, " {exclude}")?; } @@ -505,6 +511,29 @@ impl fmt::Display for WildcardAdditionalOptions { } } +/// Snowflake `ILIKE` information. +/// +/// # Syntax +/// ```plaintext +/// ILIKE +/// ``` +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct IlikeSelectItem { + pub pattern: String, +} + +impl fmt::Display for IlikeSelectItem { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "ILIKE '{}'", + value::escape_single_quote_string(&self.pattern) + )?; + Ok(()) + } +} /// Snowflake `EXCLUDE` information. /// /// # Syntax diff --git a/src/parser/mod.rs b/src/parser/mod.rs index a45c9da33..6779dfd0f 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -9018,7 +9018,13 @@ impl<'a> Parser<'a> { pub fn parse_wildcard_additional_options( &mut self, ) -> Result { - let opt_exclude = if dialect_of!(self is GenericDialect | DuckDbDialect | SnowflakeDialect) + let opt_ilike = if dialect_of!(self is GenericDialect | SnowflakeDialect) { + self.parse_optional_select_item_ilike()? + } else { + None + }; + let opt_exclude = if opt_ilike.is_none() + && dialect_of!(self is GenericDialect | DuckDbDialect | SnowflakeDialect) { self.parse_optional_select_item_exclude()? } else { @@ -9044,6 +9050,7 @@ impl<'a> Parser<'a> { }; Ok(WildcardAdditionalOptions { + opt_ilike, opt_exclude, opt_except, opt_rename, @@ -9051,6 +9058,25 @@ impl<'a> Parser<'a> { }) } + /// Parse an [`Ilike`](IlikeSelectItem) information for wildcard select items. + /// + /// If it is not possible to parse it, will return an option. + pub fn parse_optional_select_item_ilike( + &mut self, + ) -> Result, ParserError> { + let opt_ilike = if self.parse_keyword(Keyword::ILIKE) { + let next_token = self.next_token(); + let pattern = match next_token.token { + Token::SingleQuotedString(s) => s, + _ => return self.expected("ilike pattern", next_token), + }; + Some(IlikeSelectItem { pattern }) + } else { + None + }; + Ok(opt_ilike) + } + /// Parse an [`Exclude`](ExcludeSelectItem) information for wildcard select items. /// /// If it is not possible to parse it, will return an option. diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 3aa84b923..bbc0f0b2f 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -6622,6 +6622,7 @@ fn lateral_function() { distinct: None, top: None, projection: vec![SelectItem::Wildcard(WildcardAdditionalOptions { + opt_ilike: None, opt_exclude: None, opt_except: None, opt_rename: None, diff --git a/tests/sqlparser_duckdb.rs b/tests/sqlparser_duckdb.rs index d6a6b7d4b..fd420c8a3 100644 --- a/tests/sqlparser_duckdb.rs +++ b/tests/sqlparser_duckdb.rs @@ -148,6 +148,7 @@ fn test_select_union_by_name() { distinct: None, top: None, projection: vec![SelectItem::Wildcard(WildcardAdditionalOptions { + opt_ilike: None, opt_exclude: None, opt_except: None, opt_rename: None, @@ -183,6 +184,7 @@ fn test_select_union_by_name() { distinct: None, top: None, projection: vec![SelectItem::Wildcard(WildcardAdditionalOptions { + opt_ilike: None, opt_exclude: None, opt_except: None, opt_rename: None, diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index b76e84ed4..56060a0d7 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -1555,3 +1555,42 @@ fn parse_comma_outer_join() { fn test_sf_trailing_commas() { snowflake().verified_only_select_with_canonical("SELECT 1, 2, FROM t", "SELECT 1, 2 FROM t"); } + +#[test] +fn test_select_wildcard_with_ilike() { + let select = snowflake_and_generic().verified_only_select(r#"SELECT * ILIKE '%id%' FROM tbl"#); + let expected = SelectItem::Wildcard(WildcardAdditionalOptions { + opt_ilike: Some(IlikeSelectItem { + pattern: "%id%".to_owned(), + }), + ..Default::default() + }); + assert_eq!(expected, select.projection[0]); +} + +#[test] +fn test_select_wildcard_with_ilike_double_quote() { + let res = snowflake().parse_sql_statements(r#"SELECT * ILIKE "%id" FROM tbl"#); + assert_eq!( + res.unwrap_err().to_string(), + "sql parser error: Expected ilike pattern, found: \"%id\"" + ); +} + +#[test] +fn test_select_wildcard_with_ilike_number() { + let res = snowflake().parse_sql_statements(r#"SELECT * ILIKE 42 FROM tbl"#); + assert_eq!( + res.unwrap_err().to_string(), + "sql parser error: Expected ilike pattern, found: 42" + ); +} + +#[test] +fn test_select_wildcard_with_ilike_replace() { + let res = snowflake().parse_sql_statements(r#"SELECT * ILIKE '%id%' EXCLUDE col FROM tbl"#); + assert_eq!( + res.unwrap_err().to_string(), + "sql parser error: Expected end of statement, found: EXCLUDE" + ); +} From 7b49c69b3a14ba3a2763dd4c9b33913e6e5649ca Mon Sep 17 00:00:00 2001 From: Kould Date: Sun, 21 Apr 2024 20:32:53 +0800 Subject: [PATCH 29/42] Support `Modify Column` for MySQL dialect (#1216) --- src/ast/ddl.rs | 24 +++++++++++ src/keywords.rs | 1 + src/parser/mod.rs | 17 ++++++++ tests/sqlparser_mysql.rs | 93 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 135 insertions(+) diff --git a/src/ast/ddl.rs b/src/ast/ddl.rs index d86ebad9d..de514550b 100644 --- a/src/ast/ddl.rs +++ b/src/ast/ddl.rs @@ -134,6 +134,14 @@ pub enum AlterTableOperation { /// MySQL `ALTER TABLE` only [FIRST | AFTER column_name] column_position: Option, }, + // CHANGE [ COLUMN ] [ ] + ModifyColumn { + col_name: Ident, + data_type: DataType, + options: Vec, + /// MySQL `ALTER TABLE` only [FIRST | AFTER column_name] + column_position: Option, + }, /// `RENAME CONSTRAINT TO ` /// /// Note: this is a PostgreSQL-specific operation. @@ -292,6 +300,22 @@ impl fmt::Display for AlterTableOperation { Ok(()) } + AlterTableOperation::ModifyColumn { + col_name, + data_type, + options, + column_position, + } => { + write!(f, "MODIFY COLUMN {col_name} {data_type}")?; + if !options.is_empty() { + write!(f, " {}", display_separated(options, " "))?; + } + if let Some(position) = column_position { + write!(f, " {position}")?; + } + + Ok(()) + } AlterTableOperation::RenameConstraint { old_name, new_name } => { write!(f, "RENAME CONSTRAINT {old_name} TO {new_name}") } diff --git a/src/keywords.rs b/src/keywords.rs index 12a376b2a..fcc344bcd 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -438,6 +438,7 @@ define_keywords!( MOD, MODE, MODIFIES, + MODIFY, MODULE, MONTH, MSCK, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 6779dfd0f..9910c889a 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -5757,6 +5757,23 @@ impl<'a> Parser<'a> { options, column_position, } + } else if self.parse_keyword(Keyword::MODIFY) { + let _ = self.parse_keyword(Keyword::COLUMN); // [ COLUMN ] + let col_name = self.parse_identifier(false)?; + let data_type = self.parse_data_type()?; + let mut options = vec![]; + while let Some(option) = self.parse_optional_column_option()? { + options.push(option); + } + + let column_position = self.parse_column_position()?; + + AlterTableOperation::ModifyColumn { + col_name, + data_type, + options, + column_position, + } } else if self.parse_keyword(Keyword::ALTER) { let _ = self.parse_keyword(Keyword::COLUMN); // [ COLUMN ] let column_name = self.parse_identifier(false)?; diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index 5f64079a6..e53f434d5 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -2218,6 +2218,99 @@ fn parse_alter_table_change_column_with_column_position() { assert_eq!(expected_operation_after, operation); } +#[test] +fn parse_alter_table_modify_column() { + let expected_name = ObjectName(vec![Ident::new("orders")]); + let expected_operation = AlterTableOperation::ModifyColumn { + col_name: Ident::new("description"), + data_type: DataType::Text, + options: vec![ColumnOption::NotNull], + column_position: None, + }; + + let sql1 = "ALTER TABLE orders MODIFY COLUMN description TEXT NOT NULL"; + let operation = + alter_table_op_with_name(mysql().verified_stmt(sql1), &expected_name.to_string()); + assert_eq!(expected_operation, operation); + + let sql2 = "ALTER TABLE orders MODIFY description TEXT NOT NULL"; + let operation = alter_table_op_with_name( + mysql().one_statement_parses_to(sql2, sql1), + &expected_name.to_string(), + ); + assert_eq!(expected_operation, operation); + + let expected_operation = AlterTableOperation::ModifyColumn { + col_name: Ident::new("description"), + data_type: DataType::Text, + options: vec![ColumnOption::NotNull], + column_position: Some(MySQLColumnPosition::First), + }; + let sql3 = "ALTER TABLE orders MODIFY COLUMN description TEXT NOT NULL FIRST"; + let operation = + alter_table_op_with_name(mysql().verified_stmt(sql3), &expected_name.to_string()); + assert_eq!(expected_operation, operation); + + let expected_operation = AlterTableOperation::ModifyColumn { + col_name: Ident::new("description"), + data_type: DataType::Text, + options: vec![ColumnOption::NotNull], + column_position: Some(MySQLColumnPosition::After(Ident { + value: String::from("foo"), + quote_style: None, + })), + }; + let sql4 = "ALTER TABLE orders MODIFY COLUMN description TEXT NOT NULL AFTER foo"; + let operation = + alter_table_op_with_name(mysql().verified_stmt(sql4), &expected_name.to_string()); + assert_eq!(expected_operation, operation); +} + +#[test] +fn parse_alter_table_modify_column_with_column_position() { + let expected_name = ObjectName(vec![Ident::new("orders")]); + let expected_operation_first = AlterTableOperation::ModifyColumn { + col_name: Ident::new("description"), + data_type: DataType::Text, + options: vec![ColumnOption::NotNull], + column_position: Some(MySQLColumnPosition::First), + }; + + let sql1 = "ALTER TABLE orders MODIFY COLUMN description TEXT NOT NULL FIRST"; + let operation = + alter_table_op_with_name(mysql().verified_stmt(sql1), &expected_name.to_string()); + assert_eq!(expected_operation_first, operation); + + let sql2 = "ALTER TABLE orders MODIFY description TEXT NOT NULL FIRST"; + let operation = alter_table_op_with_name( + mysql().one_statement_parses_to(sql2, sql1), + &expected_name.to_string(), + ); + assert_eq!(expected_operation_first, operation); + + let expected_operation_after = AlterTableOperation::ModifyColumn { + col_name: Ident::new("description"), + data_type: DataType::Text, + options: vec![ColumnOption::NotNull], + column_position: Some(MySQLColumnPosition::After(Ident { + value: String::from("total_count"), + quote_style: None, + })), + }; + + let sql1 = "ALTER TABLE orders MODIFY COLUMN description TEXT NOT NULL AFTER total_count"; + let operation = + alter_table_op_with_name(mysql().verified_stmt(sql1), &expected_name.to_string()); + assert_eq!(expected_operation_after, operation); + + let sql2 = "ALTER TABLE orders MODIFY description TEXT NOT NULL AFTER total_count"; + let operation = alter_table_op_with_name( + mysql().one_statement_parses_to(sql2, sql1), + &expected_name.to_string(), + ); + assert_eq!(expected_operation_after, operation); +} + #[test] fn parse_substring_in_select() { let sql = "SELECT DISTINCT SUBSTRING(description, 0, 1) FROM test"; From d2c2b15f9e349e5aba679baed96af497c734d834 Mon Sep 17 00:00:00 2001 From: Ifeanyi Ubah Date: Sun, 21 Apr 2024 15:07:56 +0200 Subject: [PATCH 30/42] Add support for quoted string backslash escaping (#1177) --- src/ast/mod.rs | 6 +- src/dialect/bigquery.rs | 5 + src/dialect/clickhouse.rs | 4 + src/dialect/mod.rs | 21 ++++ src/dialect/mysql.rs | 5 + src/dialect/snowflake.rs | 5 + src/parser/mod.rs | 4 +- src/tokenizer.rs | 130 ++++++++++++++++------- tests/sqlparser_bigquery.rs | 109 ------------------- tests/sqlparser_clickhouse.rs | 109 ------------------- tests/sqlparser_common.rs | 191 +++++++++++++++++++++++++++++++++- tests/sqlparser_hive.rs | 111 +------------------- tests/sqlparser_mssql.rs | 109 ------------------- tests/sqlparser_mysql.rs | 72 ------------- tests/sqlparser_postgres.rs | 109 ------------------- tests/sqlparser_redshift.rs | 109 ------------------- tests/sqlparser_snowflake.rs | 140 ++++--------------------- tests/sqlparser_sqlite.rs | 109 ------------------- 18 files changed, 352 insertions(+), 996 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 31924e051..b78a559a0 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -512,21 +512,21 @@ pub enum Expr { negated: bool, expr: Box, pattern: Box, - escape_char: Option, + escape_char: Option, }, /// `ILIKE` (case-insensitive `LIKE`) ILike { negated: bool, expr: Box, pattern: Box, - escape_char: Option, + escape_char: Option, }, /// SIMILAR TO regex SimilarTo { negated: bool, expr: Box, pattern: Box, - escape_char: Option, + escape_char: Option, }, /// MySQL: RLIKE regex or REGEXP regex RLike { diff --git a/src/dialect/bigquery.rs b/src/dialect/bigquery.rs index bcd27c3b5..d36910dbc 100644 --- a/src/dialect/bigquery.rs +++ b/src/dialect/bigquery.rs @@ -29,4 +29,9 @@ impl Dialect for BigQueryDialect { fn is_identifier_part(&self, ch: char) -> bool { ch.is_ascii_lowercase() || ch.is_ascii_uppercase() || ch.is_ascii_digit() || ch == '_' } + + // See https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#escape_sequences + fn supports_string_literal_backslash_escape(&self) -> bool { + true + } } diff --git a/src/dialect/clickhouse.rs b/src/dialect/clickhouse.rs index 50fbde99e..83cc4ae9a 100644 --- a/src/dialect/clickhouse.rs +++ b/src/dialect/clickhouse.rs @@ -25,4 +25,8 @@ impl Dialect for ClickHouseDialect { fn is_identifier_part(&self, ch: char) -> bool { self.is_identifier_start(ch) || ch.is_ascii_digit() } + + fn supports_string_literal_backslash_escape(&self) -> bool { + true + } } diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index 2463121e7..e409c716e 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -120,6 +120,23 @@ pub trait Dialect: Debug + Any { fn is_identifier_start(&self, ch: char) -> bool; /// Determine if a character is a valid unquoted identifier character fn is_identifier_part(&self, ch: char) -> bool; + /// Determine if the dialect supports escaping characters via '\' in string literals. + /// + /// Some dialects like BigQuery and Snowflake support this while others like + /// Postgres do not. Such that the following is accepted by the former but + /// rejected by the latter. + /// ```sql + /// SELECT 'ab\'cd'; + /// ``` + /// + /// Conversely, such dialects reject the following statement which + /// otherwise would be valid in the other dialects. + /// ```sql + /// SELECT '\'; + /// ``` + fn supports_string_literal_backslash_escape(&self) -> bool { + false + } /// Does the dialect support `FILTER (WHERE expr)` for aggregate queries? fn supports_filter_during_aggregation(&self) -> bool { false @@ -306,6 +323,10 @@ mod tests { self.0.identifier_quote_style(identifier) } + fn supports_string_literal_backslash_escape(&self) -> bool { + self.0.supports_string_literal_backslash_escape() + } + fn is_proper_identifier_inside_quotes( &self, chars: std::iter::Peekable>, diff --git a/src/dialect/mysql.rs b/src/dialect/mysql.rs index d0dbe923c..f7711b2b0 100644 --- a/src/dialect/mysql.rs +++ b/src/dialect/mysql.rs @@ -48,6 +48,11 @@ impl Dialect for MySqlDialect { Some('`') } + // See https://dev.mysql.com/doc/refman/8.0/en/string-literals.html#character-escape-sequences + fn supports_string_literal_backslash_escape(&self) -> bool { + true + } + fn parse_infix( &self, parser: &mut crate::parser::Parser, diff --git a/src/dialect/snowflake.rs b/src/dialect/snowflake.rs index 1d9d983e5..28b18b78c 100644 --- a/src/dialect/snowflake.rs +++ b/src/dialect/snowflake.rs @@ -46,6 +46,11 @@ impl Dialect for SnowflakeDialect { || ch == '_' } + // See https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#escape_sequences + fn supports_string_literal_backslash_escape(&self) -> bool { + true + } + fn supports_within_after_array_aggregation(&self) -> bool { true } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 9910c889a..9ad27b16a 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -2560,9 +2560,9 @@ impl<'a> Parser<'a> { } /// parse the ESCAPE CHAR portion of LIKE, ILIKE, and SIMILAR TO - pub fn parse_escape_char(&mut self) -> Result, ParserError> { + pub fn parse_escape_char(&mut self) -> Result, ParserError> { if self.parse_keyword(Keyword::ESCAPE) { - Ok(Some(self.parse_literal_char()?)) + Ok(Some(self.parse_literal_string()?)) } else { Ok(None) } diff --git a/src/tokenizer.rs b/src/tokenizer.rs index b239d990e..b99eeba80 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -627,11 +627,11 @@ impl<'a> Tokenizer<'a> { chars.next(); // consume match chars.peek() { Some('\'') => { - let s = self.tokenize_quoted_string(chars, '\'')?; + let s = self.tokenize_quoted_string(chars, '\'', false)?; Ok(Some(Token::SingleQuotedByteStringLiteral(s))) } Some('\"') => { - let s = self.tokenize_quoted_string(chars, '\"')?; + let s = self.tokenize_quoted_string(chars, '\"', false)?; Ok(Some(Token::DoubleQuotedByteStringLiteral(s))) } _ => { @@ -646,11 +646,11 @@ impl<'a> Tokenizer<'a> { chars.next(); // consume match chars.peek() { Some('\'') => { - let s = self.tokenize_quoted_string(chars, '\'')?; + let s = self.tokenize_quoted_string(chars, '\'', false)?; Ok(Some(Token::RawStringLiteral(s))) } Some('\"') => { - let s = self.tokenize_quoted_string(chars, '\"')?; + let s = self.tokenize_quoted_string(chars, '\"', false)?; Ok(Some(Token::RawStringLiteral(s))) } _ => { @@ -666,7 +666,7 @@ impl<'a> Tokenizer<'a> { match chars.peek() { Some('\'') => { // N'...' - a - let s = self.tokenize_quoted_string(chars, '\'')?; + let s = self.tokenize_quoted_string(chars, '\'', true)?; Ok(Some(Token::NationalStringLiteral(s))) } _ => { @@ -700,7 +700,7 @@ impl<'a> Tokenizer<'a> { match chars.peek() { Some('\'') => { // X'...' - a - let s = self.tokenize_quoted_string(chars, '\'')?; + let s = self.tokenize_quoted_string(chars, '\'', true)?; Ok(Some(Token::HexStringLiteral(s))) } _ => { @@ -712,7 +712,11 @@ impl<'a> Tokenizer<'a> { } // single quoted string '\'' => { - let s = self.tokenize_quoted_string(chars, '\'')?; + let s = self.tokenize_quoted_string( + chars, + '\'', + self.dialect.supports_string_literal_backslash_escape(), + )?; Ok(Some(Token::SingleQuotedString(s))) } @@ -720,7 +724,11 @@ impl<'a> Tokenizer<'a> { '\"' if !self.dialect.is_delimited_identifier_start(ch) && !self.dialect.is_identifier_start(ch) => { - let s = self.tokenize_quoted_string(chars, '"')?; + let s = self.tokenize_quoted_string( + chars, + '"', + self.dialect.supports_string_literal_backslash_escape(), + )?; Ok(Some(Token::DoubleQuotedString(s))) } @@ -1222,6 +1230,7 @@ impl<'a> Tokenizer<'a> { &self, chars: &mut State, quote_style: char, + allow_escape: bool, ) -> Result { let mut s = String::new(); let error_loc = chars.location(); @@ -1243,35 +1252,31 @@ impl<'a> Tokenizer<'a> { return Ok(s); } } - '\\' => { - // consume + '\\' if allow_escape => { + // consume backslash chars.next(); - // slash escaping is specific to MySQL dialect. - if dialect_of!(self is MySqlDialect) { - if let Some(next) = chars.peek() { - if !self.unescape { - // In no-escape mode, the given query has to be saved completely including backslashes. - s.push(ch); - s.push(*next); - chars.next(); // consume next - } else { - // See https://dev.mysql.com/doc/refman/8.0/en/string-literals.html#character-escape-sequences - let n = match next { - '\'' | '\"' | '\\' | '%' | '_' => *next, - '0' => '\0', - 'b' => '\u{8}', - 'n' => '\n', - 'r' => '\r', - 't' => '\t', - 'Z' => '\u{1a}', - _ => *next, - }; - s.push(n); - chars.next(); // consume next - } + + if let Some(next) = chars.peek() { + if !self.unescape { + // In no-escape mode, the given query has to be saved completely including backslashes. + s.push(ch); + s.push(*next); + chars.next(); // consume next + } else { + let n = match next { + '0' => '\0', + 'a' => '\u{7}', + 'b' => '\u{8}', + 'f' => '\u{c}', + 'n' => '\n', + 'r' => '\r', + 't' => '\t', + 'Z' => '\u{1a}', + _ => *next, + }; + s.push(n); + chars.next(); // consume next } - } else { - s.push(ch); } } _ => { @@ -1517,7 +1522,7 @@ impl<'a: 'b, 'b> Unescape<'a, 'b> { #[cfg(test)] mod tests { use super::*; - use crate::dialect::{ClickHouseDialect, MsSqlDialect}; + use crate::dialect::{BigQueryDialect, ClickHouseDialect, MsSqlDialect}; #[test] fn tokenizer_error_impl() { @@ -2386,4 +2391,57 @@ mod tests { check_unescape(r"Hello\0", None); check_unescape(r"Hello\xCADRust", None); } + + #[test] + fn tokenize_quoted_string_escape() { + for (sql, expected, expected_unescaped) in [ + (r#"'%a\'%b'"#, r#"%a\'%b"#, r#"%a'%b"#), + (r#"'a\'\'b\'c\'d'"#, r#"a\'\'b\'c\'d"#, r#"a''b'c'd"#), + (r#"'\\'"#, r#"\\"#, r#"\"#), + ( + r#"'\0\a\b\f\n\r\t\Z'"#, + r#"\0\a\b\f\n\r\t\Z"#, + "\0\u{7}\u{8}\u{c}\n\r\t\u{1a}", + ), + (r#"'\"'"#, r#"\""#, "\""), + (r#"'\\a\\b\'c'"#, r#"\\a\\b\'c"#, r#"\a\b'c"#), + (r#"'\'abcd'"#, r#"\'abcd"#, r#"'abcd"#), + (r#"'''a''b'"#, r#"''a''b"#, r#"'a'b"#), + ] { + let dialect = BigQueryDialect {}; + + let tokens = Tokenizer::new(&dialect, sql) + .with_unescape(false) + .tokenize() + .unwrap(); + let expected = vec![Token::SingleQuotedString(expected.to_string())]; + compare(expected, tokens); + + let tokens = Tokenizer::new(&dialect, sql) + .with_unescape(true) + .tokenize() + .unwrap(); + let expected = vec![Token::SingleQuotedString(expected_unescaped.to_string())]; + compare(expected, tokens); + } + + for sql in [r#"'\'"#, r#"'ab\'"#] { + let dialect = BigQueryDialect {}; + let mut tokenizer = Tokenizer::new(&dialect, sql); + assert_eq!( + "Unterminated string literal", + tokenizer.tokenize().unwrap_err().message.as_str(), + ); + } + + // Non-escape dialect + for (sql, expected) in [(r#"'\'"#, r#"\"#), (r#"'ab\'"#, r#"ab\"#)] { + let dialect = GenericDialect {}; + let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap(); + + let expected = vec![Token::SingleQuotedString(expected.to_string())]; + + compare(expected, tokens); + } + } } diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index 43e6a84b7..a01c09d96 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -1128,115 +1128,6 @@ fn parse_cast_bytes_to_string_format() { bigquery_and_generic().verified_only_select(sql); } -#[test] -fn parse_like() { - fn chk(negated: bool) { - let sql = &format!( - "SELECT * FROM customers WHERE name {}LIKE '%a'", - if negated { "NOT " } else { "" } - ); - let select = bigquery().verified_only_select(sql); - assert_eq!( - Expr::Like { - expr: Box::new(Expr::Identifier(Ident::new("name"))), - negated, - pattern: Box::new(Expr::Value(Value::SingleQuotedString("%a".to_string()))), - escape_char: None, - }, - select.selection.unwrap() - ); - - // Test with escape char - let sql = &format!( - "SELECT * FROM customers WHERE name {}LIKE '%a' ESCAPE '\\'", - if negated { "NOT " } else { "" } - ); - let select = bigquery().verified_only_select(sql); - assert_eq!( - Expr::Like { - expr: Box::new(Expr::Identifier(Ident::new("name"))), - negated, - pattern: Box::new(Expr::Value(Value::SingleQuotedString("%a".to_string()))), - escape_char: Some('\\'), - }, - select.selection.unwrap() - ); - - // This statement tests that LIKE and NOT LIKE have the same precedence. - // This was previously mishandled (#81). - let sql = &format!( - "SELECT * FROM customers WHERE name {}LIKE '%a' IS NULL", - if negated { "NOT " } else { "" } - ); - let select = bigquery().verified_only_select(sql); - assert_eq!( - Expr::IsNull(Box::new(Expr::Like { - expr: Box::new(Expr::Identifier(Ident::new("name"))), - negated, - pattern: Box::new(Expr::Value(Value::SingleQuotedString("%a".to_string()))), - escape_char: None, - })), - select.selection.unwrap() - ); - } - chk(false); - chk(true); -} - -#[test] -fn parse_similar_to() { - fn chk(negated: bool) { - let sql = &format!( - "SELECT * FROM customers WHERE name {}SIMILAR TO '%a'", - if negated { "NOT " } else { "" } - ); - let select = bigquery().verified_only_select(sql); - assert_eq!( - Expr::SimilarTo { - expr: Box::new(Expr::Identifier(Ident::new("name"))), - negated, - pattern: Box::new(Expr::Value(Value::SingleQuotedString("%a".to_string()))), - escape_char: None, - }, - select.selection.unwrap() - ); - - // Test with escape char - let sql = &format!( - "SELECT * FROM customers WHERE name {}SIMILAR TO '%a' ESCAPE '\\'", - if negated { "NOT " } else { "" } - ); - let select = bigquery().verified_only_select(sql); - assert_eq!( - Expr::SimilarTo { - expr: Box::new(Expr::Identifier(Ident::new("name"))), - negated, - pattern: Box::new(Expr::Value(Value::SingleQuotedString("%a".to_string()))), - escape_char: Some('\\'), - }, - select.selection.unwrap() - ); - - // This statement tests that SIMILAR TO and NOT SIMILAR TO have the same precedence. - let sql = &format!( - "SELECT * FROM customers WHERE name {}SIMILAR TO '%a' ESCAPE '\\' IS NULL", - if negated { "NOT " } else { "" } - ); - let select = bigquery().verified_only_select(sql); - assert_eq!( - Expr::IsNull(Box::new(Expr::SimilarTo { - expr: Box::new(Expr::Identifier(Ident::new("name"))), - negated, - pattern: Box::new(Expr::Value(Value::SingleQuotedString("%a".to_string()))), - escape_char: Some('\\'), - })), - select.selection.unwrap() - ); - } - chk(false); - chk(true); -} - #[test] fn parse_array_agg_func() { for sql in [ diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index 22396d064..4f1e67a17 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -230,115 +230,6 @@ fn parse_delimited_identifiers() { //TODO verified_stmt(r#"UPDATE foo SET "bar" = 5"#); } -#[test] -fn parse_like() { - fn chk(negated: bool) { - let sql = &format!( - "SELECT * FROM customers WHERE name {}LIKE '%a'", - if negated { "NOT " } else { "" } - ); - let select = clickhouse().verified_only_select(sql); - assert_eq!( - Expr::Like { - expr: Box::new(Expr::Identifier(Ident::new("name"))), - negated, - pattern: Box::new(Expr::Value(Value::SingleQuotedString("%a".to_string()))), - escape_char: None, - }, - select.selection.unwrap() - ); - - // Test with escape char - let sql = &format!( - "SELECT * FROM customers WHERE name {}LIKE '%a' ESCAPE '\\'", - if negated { "NOT " } else { "" } - ); - let select = clickhouse().verified_only_select(sql); - assert_eq!( - Expr::Like { - expr: Box::new(Expr::Identifier(Ident::new("name"))), - negated, - pattern: Box::new(Expr::Value(Value::SingleQuotedString("%a".to_string()))), - escape_char: Some('\\'), - }, - select.selection.unwrap() - ); - - // This statement tests that LIKE and NOT LIKE have the same precedence. - // This was previously mishandled (#81). - let sql = &format!( - "SELECT * FROM customers WHERE name {}LIKE '%a' IS NULL", - if negated { "NOT " } else { "" } - ); - let select = clickhouse().verified_only_select(sql); - assert_eq!( - Expr::IsNull(Box::new(Expr::Like { - expr: Box::new(Expr::Identifier(Ident::new("name"))), - negated, - pattern: Box::new(Expr::Value(Value::SingleQuotedString("%a".to_string()))), - escape_char: None, - })), - select.selection.unwrap() - ); - } - chk(false); - chk(true); -} - -#[test] -fn parse_similar_to() { - fn chk(negated: bool) { - let sql = &format!( - "SELECT * FROM customers WHERE name {}SIMILAR TO '%a'", - if negated { "NOT " } else { "" } - ); - let select = clickhouse().verified_only_select(sql); - assert_eq!( - Expr::SimilarTo { - expr: Box::new(Expr::Identifier(Ident::new("name"))), - negated, - pattern: Box::new(Expr::Value(Value::SingleQuotedString("%a".to_string()))), - escape_char: None, - }, - select.selection.unwrap() - ); - - // Test with escape char - let sql = &format!( - "SELECT * FROM customers WHERE name {}SIMILAR TO '%a' ESCAPE '\\'", - if negated { "NOT " } else { "" } - ); - let select = clickhouse().verified_only_select(sql); - assert_eq!( - Expr::SimilarTo { - expr: Box::new(Expr::Identifier(Ident::new("name"))), - negated, - pattern: Box::new(Expr::Value(Value::SingleQuotedString("%a".to_string()))), - escape_char: Some('\\'), - }, - select.selection.unwrap() - ); - - // This statement tests that SIMILAR TO and NOT SIMILAR TO have the same precedence. - let sql = &format!( - "SELECT * FROM customers WHERE name {}SIMILAR TO '%a' ESCAPE '\\' IS NULL", - if negated { "NOT " } else { "" } - ); - let select = clickhouse().verified_only_select(sql); - assert_eq!( - Expr::IsNull(Box::new(Expr::SimilarTo { - expr: Box::new(Expr::Identifier(Ident::new("name"))), - negated, - pattern: Box::new(Expr::Value(Value::SingleQuotedString("%a".to_string()))), - escape_char: Some('\\'), - })), - select.selection.unwrap() - ); - } - chk(false); - chk(true); -} - #[test] fn parse_create_table() { clickhouse().verified_stmt(r#"CREATE TABLE "x" ("a" "int") ENGINE=MergeTree ORDER BY ("x")"#); diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index bbc0f0b2f..dd447ebb8 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -1599,7 +1599,7 @@ fn parse_ilike() { expr: Box::new(Expr::Identifier(Ident::new("name"))), negated, pattern: Box::new(Expr::Value(Value::SingleQuotedString("%a".to_string()))), - escape_char: Some('^'), + escape_char: Some('^'.to_string()), }, select.selection.unwrap() ); @@ -1625,6 +1625,115 @@ fn parse_ilike() { chk(true); } +#[test] +fn parse_like() { + fn chk(negated: bool) { + let sql = &format!( + "SELECT * FROM customers WHERE name {}LIKE '%a'", + if negated { "NOT " } else { "" } + ); + let select = verified_only_select(sql); + assert_eq!( + Expr::Like { + expr: Box::new(Expr::Identifier(Ident::new("name"))), + negated, + pattern: Box::new(Expr::Value(Value::SingleQuotedString("%a".to_string()))), + escape_char: None, + }, + select.selection.unwrap() + ); + + // Test with escape char + let sql = &format!( + "SELECT * FROM customers WHERE name {}LIKE '%a' ESCAPE '^'", + if negated { "NOT " } else { "" } + ); + let select = verified_only_select(sql); + assert_eq!( + Expr::Like { + expr: Box::new(Expr::Identifier(Ident::new("name"))), + negated, + pattern: Box::new(Expr::Value(Value::SingleQuotedString("%a".to_string()))), + escape_char: Some('^'.to_string()), + }, + select.selection.unwrap() + ); + + // This statement tests that LIKE and NOT LIKE have the same precedence. + // This was previously mishandled (#81). + let sql = &format!( + "SELECT * FROM customers WHERE name {}LIKE '%a' IS NULL", + if negated { "NOT " } else { "" } + ); + let select = verified_only_select(sql); + assert_eq!( + Expr::IsNull(Box::new(Expr::Like { + expr: Box::new(Expr::Identifier(Ident::new("name"))), + negated, + pattern: Box::new(Expr::Value(Value::SingleQuotedString("%a".to_string()))), + escape_char: None, + })), + select.selection.unwrap() + ); + } + chk(false); + chk(true); +} + +#[test] +fn parse_similar_to() { + fn chk(negated: bool) { + let sql = &format!( + "SELECT * FROM customers WHERE name {}SIMILAR TO '%a'", + if negated { "NOT " } else { "" } + ); + let select = verified_only_select(sql); + assert_eq!( + Expr::SimilarTo { + expr: Box::new(Expr::Identifier(Ident::new("name"))), + negated, + pattern: Box::new(Expr::Value(Value::SingleQuotedString("%a".to_string()))), + escape_char: None, + }, + select.selection.unwrap() + ); + + // Test with escape char + let sql = &format!( + "SELECT * FROM customers WHERE name {}SIMILAR TO '%a' ESCAPE '^'", + if negated { "NOT " } else { "" } + ); + let select = verified_only_select(sql); + assert_eq!( + Expr::SimilarTo { + expr: Box::new(Expr::Identifier(Ident::new("name"))), + negated, + pattern: Box::new(Expr::Value(Value::SingleQuotedString("%a".to_string()))), + escape_char: Some('^'.to_string()), + }, + select.selection.unwrap() + ); + + // This statement tests that SIMILAR TO and NOT SIMILAR TO have the same precedence. + let sql = &format!( + "SELECT * FROM customers WHERE name {}SIMILAR TO '%a' ESCAPE '^' IS NULL", + if negated { "NOT " } else { "" } + ); + let select = verified_only_select(sql); + assert_eq!( + Expr::IsNull(Box::new(Expr::SimilarTo { + expr: Box::new(Expr::Identifier(Ident::new("name"))), + negated, + pattern: Box::new(Expr::Value(Value::SingleQuotedString("%a".to_string()))), + escape_char: Some('^'.to_string()), + })), + select.selection.unwrap() + ); + } + chk(false); + chk(true); +} + #[test] fn parse_in_list() { fn chk(negated: bool) { @@ -8166,6 +8275,86 @@ fn parse_with_recursion_limit() { assert!(res.is_ok(), "{res:?}"); } +#[test] +fn parse_escaped_string_with_unescape() { + fn assert_mysql_query_value(sql: &str, quoted: &str) { + let stmt = TestedDialects { + dialects: vec![ + Box::new(MySqlDialect {}), + Box::new(BigQueryDialect {}), + Box::new(SnowflakeDialect {}), + ], + options: None, + } + .one_statement_parses_to(sql, ""); + + match stmt { + Statement::Query(query) => match *query.body { + SetExpr::Select(value) => { + let expr = expr_from_projection(only(&value.projection)); + assert_eq!( + *expr, + Expr::Value(Value::SingleQuotedString(quoted.to_string())) + ); + } + _ => unreachable!(), + }, + _ => unreachable!(), + }; + } + let sql = r"SELECT 'I\'m fine'"; + assert_mysql_query_value(sql, "I'm fine"); + + let sql = r#"SELECT 'I''m fine'"#; + assert_mysql_query_value(sql, "I'm fine"); + + let sql = r#"SELECT 'I\"m fine'"#; + assert_mysql_query_value(sql, "I\"m fine"); + + let sql = r"SELECT 'Testing: \0 \\ \% \_ \b \n \r \t \Z \a \h \ '"; + assert_mysql_query_value(sql, "Testing: \0 \\ % _ \u{8} \n \r \t \u{1a} \u{7} h "); +} + +#[test] +fn parse_escaped_string_without_unescape() { + fn assert_mysql_query_value(sql: &str, quoted: &str) { + let stmt = TestedDialects { + dialects: vec![ + Box::new(MySqlDialect {}), + Box::new(BigQueryDialect {}), + Box::new(SnowflakeDialect {}), + ], + options: Some(ParserOptions::new().with_unescape(false)), + } + .one_statement_parses_to(sql, ""); + + match stmt { + Statement::Query(query) => match *query.body { + SetExpr::Select(value) => { + let expr = expr_from_projection(only(&value.projection)); + assert_eq!( + *expr, + Expr::Value(Value::SingleQuotedString(quoted.to_string())) + ); + } + _ => unreachable!(), + }, + _ => unreachable!(), + }; + } + let sql = r"SELECT 'I\'m fine'"; + assert_mysql_query_value(sql, r"I\'m fine"); + + let sql = r#"SELECT 'I''m fine'"#; + assert_mysql_query_value(sql, r#"I''m fine"#); + + let sql = r#"SELECT 'I\"m fine'"#; + assert_mysql_query_value(sql, r#"I\"m fine"#); + + let sql = r"SELECT 'Testing: \0 \\ \% \_ \b \n \r \t \Z \a \ '"; + assert_mysql_query_value(sql, r"Testing: \0 \\ \% \_ \b \n \r \t \Z \a \ "); +} + #[test] fn parse_pivot_table() { let sql = concat!( diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index 788c937a6..76fe961fe 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -17,7 +17,7 @@ use sqlparser::ast::{ CreateFunctionBody, CreateFunctionUsing, Expr, Function, FunctionDefinition, Ident, ObjectName, - SelectItem, Statement, TableFactor, UnaryOperator, Value, + SelectItem, Statement, TableFactor, UnaryOperator, }; use sqlparser::dialect::{GenericDialect, HiveDialect, MsSqlDialect}; use sqlparser::parser::{ParserError, ParserOptions}; @@ -420,115 +420,6 @@ fn parse_delimited_identifiers() { //TODO verified_stmt(r#"UPDATE foo SET "bar" = 5"#); } -#[test] -fn parse_like() { - fn chk(negated: bool) { - let sql = &format!( - "SELECT * FROM customers WHERE name {}LIKE '%a'", - if negated { "NOT " } else { "" } - ); - let select = hive().verified_only_select(sql); - assert_eq!( - Expr::Like { - expr: Box::new(Expr::Identifier(Ident::new("name"))), - negated, - pattern: Box::new(Expr::Value(Value::SingleQuotedString("%a".to_string()))), - escape_char: None, - }, - select.selection.unwrap() - ); - - // Test with escape char - let sql = &format!( - "SELECT * FROM customers WHERE name {}LIKE '%a' ESCAPE '\\'", - if negated { "NOT " } else { "" } - ); - let select = hive().verified_only_select(sql); - assert_eq!( - Expr::Like { - expr: Box::new(Expr::Identifier(Ident::new("name"))), - negated, - pattern: Box::new(Expr::Value(Value::SingleQuotedString("%a".to_string()))), - escape_char: Some('\\'), - }, - select.selection.unwrap() - ); - - // This statement tests that LIKE and NOT LIKE have the same precedence. - // This was previously mishandled (#81). - let sql = &format!( - "SELECT * FROM customers WHERE name {}LIKE '%a' IS NULL", - if negated { "NOT " } else { "" } - ); - let select = hive().verified_only_select(sql); - assert_eq!( - Expr::IsNull(Box::new(Expr::Like { - expr: Box::new(Expr::Identifier(Ident::new("name"))), - negated, - pattern: Box::new(Expr::Value(Value::SingleQuotedString("%a".to_string()))), - escape_char: None, - })), - select.selection.unwrap() - ); - } - chk(false); - chk(true); -} - -#[test] -fn parse_similar_to() { - fn chk(negated: bool) { - let sql = &format!( - "SELECT * FROM customers WHERE name {}SIMILAR TO '%a'", - if negated { "NOT " } else { "" } - ); - let select = hive().verified_only_select(sql); - assert_eq!( - Expr::SimilarTo { - expr: Box::new(Expr::Identifier(Ident::new("name"))), - negated, - pattern: Box::new(Expr::Value(Value::SingleQuotedString("%a".to_string()))), - escape_char: None, - }, - select.selection.unwrap() - ); - - // Test with escape char - let sql = &format!( - "SELECT * FROM customers WHERE name {}SIMILAR TO '%a' ESCAPE '\\'", - if negated { "NOT " } else { "" } - ); - let select = hive().verified_only_select(sql); - assert_eq!( - Expr::SimilarTo { - expr: Box::new(Expr::Identifier(Ident::new("name"))), - negated, - pattern: Box::new(Expr::Value(Value::SingleQuotedString("%a".to_string()))), - escape_char: Some('\\'), - }, - select.selection.unwrap() - ); - - // This statement tests that SIMILAR TO and NOT SIMILAR TO have the same precedence. - let sql = &format!( - "SELECT * FROM customers WHERE name {}SIMILAR TO '%a' ESCAPE '\\' IS NULL", - if negated { "NOT " } else { "" } - ); - let select = hive().verified_only_select(sql); - assert_eq!( - Expr::IsNull(Box::new(Expr::SimilarTo { - expr: Box::new(Expr::Identifier(Ident::new("name"))), - negated, - pattern: Box::new(Expr::Value(Value::SingleQuotedString("%a".to_string()))), - escape_char: Some('\\'), - })), - select.selection.unwrap() - ); - } - chk(false); - chk(true); -} - fn hive() -> TestedDialects { TestedDialects { dialects: vec![Box::new(HiveDialect {})], diff --git a/tests/sqlparser_mssql.rs b/tests/sqlparser_mssql.rs index ff3e75569..ed4d69e6b 100644 --- a/tests/sqlparser_mssql.rs +++ b/tests/sqlparser_mssql.rs @@ -390,61 +390,6 @@ fn parse_table_name_in_square_brackets() { ); } -#[test] -fn parse_like() { - fn chk(negated: bool) { - let sql = &format!( - "SELECT * FROM customers WHERE name {}LIKE '%a'", - if negated { "NOT " } else { "" } - ); - let select = ms_and_generic().verified_only_select(sql); - assert_eq!( - Expr::Like { - expr: Box::new(Expr::Identifier(Ident::new("name"))), - negated, - pattern: Box::new(Expr::Value(Value::SingleQuotedString("%a".to_string()))), - escape_char: None, - }, - select.selection.unwrap() - ); - - // Test with escape char - let sql = &format!( - "SELECT * FROM customers WHERE name {}LIKE '%a' ESCAPE '\\'", - if negated { "NOT " } else { "" } - ); - let select = ms_and_generic().verified_only_select(sql); - assert_eq!( - Expr::Like { - expr: Box::new(Expr::Identifier(Ident::new("name"))), - negated, - pattern: Box::new(Expr::Value(Value::SingleQuotedString("%a".to_string()))), - escape_char: Some('\\'), - }, - select.selection.unwrap() - ); - - // This statement tests that LIKE and NOT LIKE have the same precedence. - // This was previously mishandled (#81). - let sql = &format!( - "SELECT * FROM customers WHERE name {}LIKE '%a' IS NULL", - if negated { "NOT " } else { "" } - ); - let select = ms_and_generic().verified_only_select(sql); - assert_eq!( - Expr::IsNull(Box::new(Expr::Like { - expr: Box::new(Expr::Identifier(Ident::new("name"))), - negated, - pattern: Box::new(Expr::Value(Value::SingleQuotedString("%a".to_string()))), - escape_char: None, - })), - select.selection.unwrap() - ); - } - chk(false); - chk(true); -} - #[test] fn parse_for_clause() { ms_and_generic().verified_stmt("SELECT a FROM t FOR JSON PATH"); @@ -495,60 +440,6 @@ fn parse_convert() { ms().verified_expr("CONVERT(DECIMAL(10,5), 12.55)"); } -#[test] -fn parse_similar_to() { - fn chk(negated: bool) { - let sql = &format!( - "SELECT * FROM customers WHERE name {}SIMILAR TO '%a'", - if negated { "NOT " } else { "" } - ); - let select = ms_and_generic().verified_only_select(sql); - assert_eq!( - Expr::SimilarTo { - expr: Box::new(Expr::Identifier(Ident::new("name"))), - negated, - pattern: Box::new(Expr::Value(Value::SingleQuotedString("%a".to_string()))), - escape_char: None, - }, - select.selection.unwrap() - ); - - // Test with escape char - let sql = &format!( - "SELECT * FROM customers WHERE name {}SIMILAR TO '%a' ESCAPE '\\'", - if negated { "NOT " } else { "" } - ); - let select = ms_and_generic().verified_only_select(sql); - assert_eq!( - Expr::SimilarTo { - expr: Box::new(Expr::Identifier(Ident::new("name"))), - negated, - pattern: Box::new(Expr::Value(Value::SingleQuotedString("%a".to_string()))), - escape_char: Some('\\'), - }, - select.selection.unwrap() - ); - - // This statement tests that SIMILAR TO and NOT SIMILAR TO have the same precedence. - let sql = &format!( - "SELECT * FROM customers WHERE name {}SIMILAR TO '%a' ESCAPE '\\' IS NULL", - if negated { "NOT " } else { "" } - ); - let select = ms_and_generic().verified_only_select(sql); - assert_eq!( - Expr::IsNull(Box::new(Expr::SimilarTo { - expr: Box::new(Expr::Identifier(Ident::new("name"))), - negated, - pattern: Box::new(Expr::Value(Value::SingleQuotedString("%a".to_string()))), - escape_char: Some('\\'), - })), - select.selection.unwrap() - ); - } - chk(false); - chk(true); -} - #[test] fn parse_substring_in_select() { let sql = "SELECT DISTINCT SUBSTRING(description, 0, 1) FROM test"; diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index e53f434d5..b2c164e3d 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -1061,78 +1061,6 @@ fn parse_unterminated_escape() { assert!(result.is_err()); } -#[test] -fn parse_escaped_string_with_escape() { - fn assert_mysql_query_value(sql: &str, quoted: &str) { - let stmt = TestedDialects { - dialects: vec![Box::new(MySqlDialect {})], - options: None, - } - .one_statement_parses_to(sql, ""); - - match stmt { - Statement::Query(query) => match *query.body { - SetExpr::Select(value) => { - let expr = expr_from_projection(only(&value.projection)); - assert_eq!( - *expr, - Expr::Value(Value::SingleQuotedString(quoted.to_string())) - ); - } - _ => unreachable!(), - }, - _ => unreachable!(), - }; - } - let sql = r"SELECT 'I\'m fine'"; - assert_mysql_query_value(sql, "I'm fine"); - - let sql = r#"SELECT 'I''m fine'"#; - assert_mysql_query_value(sql, "I'm fine"); - - let sql = r#"SELECT 'I\"m fine'"#; - assert_mysql_query_value(sql, "I\"m fine"); - - let sql = r"SELECT 'Testing: \0 \\ \% \_ \b \n \r \t \Z \a \ '"; - assert_mysql_query_value(sql, "Testing: \0 \\ % _ \u{8} \n \r \t \u{1a} a "); -} - -#[test] -fn parse_escaped_string_with_no_escape() { - fn assert_mysql_query_value(sql: &str, quoted: &str) { - let stmt = TestedDialects { - dialects: vec![Box::new(MySqlDialect {})], - options: Some(ParserOptions::new().with_unescape(false)), - } - .one_statement_parses_to(sql, ""); - - match stmt { - Statement::Query(query) => match *query.body { - SetExpr::Select(value) => { - let expr = expr_from_projection(only(&value.projection)); - assert_eq!( - *expr, - Expr::Value(Value::SingleQuotedString(quoted.to_string())) - ); - } - _ => unreachable!(), - }, - _ => unreachable!(), - }; - } - let sql = r"SELECT 'I\'m fine'"; - assert_mysql_query_value(sql, r"I\'m fine"); - - let sql = r#"SELECT 'I''m fine'"#; - assert_mysql_query_value(sql, r#"I''m fine"#); - - let sql = r#"SELECT 'I\"m fine'"#; - assert_mysql_query_value(sql, r#"I\"m fine"#); - - let sql = r"SELECT 'Testing: \0 \\ \% \_ \b \n \r \t \Z \a \ '"; - assert_mysql_query_value(sql, r"Testing: \0 \\ \% \_ \b \n \r \t \Z \a \ "); -} - #[test] fn check_roundtrip_of_escaped_string() { let options = Some(ParserOptions::new().with_unescape(false)); diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 38e32780d..6bb4bc69b 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -3171,115 +3171,6 @@ fn parse_update_in_with_subquery() { pg_and_generic().verified_stmt(r#"WITH "result" AS (UPDATE "Hero" SET "name" = 'Captain America', "number_of_movies" = "number_of_movies" + 1 WHERE "secret_identity" = 'Sam Wilson' RETURNING "id", "name", "secret_identity", "number_of_movies") SELECT * FROM "result""#); } -#[test] -fn parse_like() { - fn chk(negated: bool) { - let sql = &format!( - "SELECT * FROM customers WHERE name {}LIKE '%a'", - if negated { "NOT " } else { "" } - ); - let select = pg().verified_only_select(sql); - assert_eq!( - Expr::Like { - expr: Box::new(Expr::Identifier(Ident::new("name"))), - negated, - pattern: Box::new(Expr::Value(Value::SingleQuotedString("%a".to_string()))), - escape_char: None, - }, - select.selection.unwrap() - ); - - // Test with escape char - let sql = &format!( - "SELECT * FROM customers WHERE name {}LIKE '%a' ESCAPE '\\'", - if negated { "NOT " } else { "" } - ); - let select = pg().verified_only_select(sql); - assert_eq!( - Expr::Like { - expr: Box::new(Expr::Identifier(Ident::new("name"))), - negated, - pattern: Box::new(Expr::Value(Value::SingleQuotedString("%a".to_string()))), - escape_char: Some('\\'), - }, - select.selection.unwrap() - ); - - // This statement tests that LIKE and NOT LIKE have the same precedence. - // This was previously mishandled (#81). - let sql = &format!( - "SELECT * FROM customers WHERE name {}LIKE '%a' IS NULL", - if negated { "NOT " } else { "" } - ); - let select = pg().verified_only_select(sql); - assert_eq!( - Expr::IsNull(Box::new(Expr::Like { - expr: Box::new(Expr::Identifier(Ident::new("name"))), - negated, - pattern: Box::new(Expr::Value(Value::SingleQuotedString("%a".to_string()))), - escape_char: None, - })), - select.selection.unwrap() - ); - } - chk(false); - chk(true); -} - -#[test] -fn parse_similar_to() { - fn chk(negated: bool) { - let sql = &format!( - "SELECT * FROM customers WHERE name {}SIMILAR TO '%a'", - if negated { "NOT " } else { "" } - ); - let select = pg().verified_only_select(sql); - assert_eq!( - Expr::SimilarTo { - expr: Box::new(Expr::Identifier(Ident::new("name"))), - negated, - pattern: Box::new(Expr::Value(Value::SingleQuotedString("%a".to_string()))), - escape_char: None, - }, - select.selection.unwrap() - ); - - // Test with escape char - let sql = &format!( - "SELECT * FROM customers WHERE name {}SIMILAR TO '%a' ESCAPE '\\'", - if negated { "NOT " } else { "" } - ); - let select = pg().verified_only_select(sql); - assert_eq!( - Expr::SimilarTo { - expr: Box::new(Expr::Identifier(Ident::new("name"))), - negated, - pattern: Box::new(Expr::Value(Value::SingleQuotedString("%a".to_string()))), - escape_char: Some('\\'), - }, - select.selection.unwrap() - ); - - // This statement tests that SIMILAR TO and NOT SIMILAR TO have the same precedence. - let sql = &format!( - "SELECT * FROM customers WHERE name {}SIMILAR TO '%a' ESCAPE '\\' IS NULL", - if negated { "NOT " } else { "" } - ); - let select = pg().verified_only_select(sql); - assert_eq!( - Expr::IsNull(Box::new(Expr::SimilarTo { - expr: Box::new(Expr::Identifier(Ident::new("name"))), - negated, - pattern: Box::new(Expr::Value(Value::SingleQuotedString("%a".to_string()))), - escape_char: Some('\\'), - })), - select.selection.unwrap() - ); - } - chk(false); - chk(true); -} - #[test] fn parse_create_function() { let sql = "CREATE FUNCTION add(INTEGER, INTEGER) RETURNS INTEGER LANGUAGE SQL IMMUTABLE STRICT PARALLEL SAFE AS 'select $1 + $2;'"; diff --git a/tests/sqlparser_redshift.rs b/tests/sqlparser_redshift.rs index 6fa647d38..3de229676 100644 --- a/tests/sqlparser_redshift.rs +++ b/tests/sqlparser_redshift.rs @@ -159,115 +159,6 @@ fn parse_delimited_identifiers() { //TODO verified_stmt(r#"UPDATE foo SET "bar" = 5"#); } -#[test] -fn parse_like() { - fn chk(negated: bool) { - let sql = &format!( - "SELECT * FROM customers WHERE name {}LIKE '%a'", - if negated { "NOT " } else { "" } - ); - let select = redshift().verified_only_select(sql); - assert_eq!( - Expr::Like { - expr: Box::new(Expr::Identifier(Ident::new("name"))), - negated, - pattern: Box::new(Expr::Value(Value::SingleQuotedString("%a".to_string()))), - escape_char: None, - }, - select.selection.unwrap() - ); - - // Test with escape char - let sql = &format!( - "SELECT * FROM customers WHERE name {}LIKE '%a' ESCAPE '\\'", - if negated { "NOT " } else { "" } - ); - let select = redshift().verified_only_select(sql); - assert_eq!( - Expr::Like { - expr: Box::new(Expr::Identifier(Ident::new("name"))), - negated, - pattern: Box::new(Expr::Value(Value::SingleQuotedString("%a".to_string()))), - escape_char: Some('\\'), - }, - select.selection.unwrap() - ); - - // This statement tests that LIKE and NOT LIKE have the same precedence. - // This was previously mishandled (#81). - let sql = &format!( - "SELECT * FROM customers WHERE name {}LIKE '%a' IS NULL", - if negated { "NOT " } else { "" } - ); - let select = redshift().verified_only_select(sql); - assert_eq!( - Expr::IsNull(Box::new(Expr::Like { - expr: Box::new(Expr::Identifier(Ident::new("name"))), - negated, - pattern: Box::new(Expr::Value(Value::SingleQuotedString("%a".to_string()))), - escape_char: None, - })), - select.selection.unwrap() - ); - } - chk(false); - chk(true); -} - -#[test] -fn parse_similar_to() { - fn chk(negated: bool) { - let sql = &format!( - "SELECT * FROM customers WHERE name {}SIMILAR TO '%a'", - if negated { "NOT " } else { "" } - ); - let select = redshift().verified_only_select(sql); - assert_eq!( - Expr::SimilarTo { - expr: Box::new(Expr::Identifier(Ident::new("name"))), - negated, - pattern: Box::new(Expr::Value(Value::SingleQuotedString("%a".to_string()))), - escape_char: None, - }, - select.selection.unwrap() - ); - - // Test with escape char - let sql = &format!( - "SELECT * FROM customers WHERE name {}SIMILAR TO '%a' ESCAPE '\\'", - if negated { "NOT " } else { "" } - ); - let select = redshift().verified_only_select(sql); - assert_eq!( - Expr::SimilarTo { - expr: Box::new(Expr::Identifier(Ident::new("name"))), - negated, - pattern: Box::new(Expr::Value(Value::SingleQuotedString("%a".to_string()))), - escape_char: Some('\\'), - }, - select.selection.unwrap() - ); - - // This statement tests that SIMILAR TO and NOT SIMILAR TO have the same precedence. - let sql = &format!( - "SELECT * FROM customers WHERE name {}SIMILAR TO '%a' ESCAPE '\\' IS NULL", - if negated { "NOT " } else { "" } - ); - let select = redshift().verified_only_select(sql); - assert_eq!( - Expr::IsNull(Box::new(Expr::SimilarTo { - expr: Box::new(Expr::Identifier(Ident::new("name"))), - negated, - pattern: Box::new(Expr::Value(Value::SingleQuotedString("%a".to_string()))), - escape_char: Some('\\'), - })), - select.selection.unwrap() - ); - } - chk(false); - chk(true); -} - fn redshift() -> TestedDialects { TestedDialects { dialects: vec![Box::new(RedshiftSqlDialect {})], diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index 56060a0d7..469e6739f 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -19,7 +19,7 @@ use sqlparser::ast::helpers::stmt_data_loading::{ }; use sqlparser::ast::*; use sqlparser::dialect::{GenericDialect, SnowflakeDialect}; -use sqlparser::parser::ParserError; +use sqlparser::parser::{ParserError, ParserOptions}; use sqlparser::tokenizer::*; use test_utils::*; @@ -309,115 +309,6 @@ fn parse_delimited_identifiers() { //TODO verified_stmt(r#"UPDATE foo SET "bar" = 5"#); } -#[test] -fn parse_like() { - fn chk(negated: bool) { - let sql = &format!( - "SELECT * FROM customers WHERE name {}LIKE '%a'", - if negated { "NOT " } else { "" } - ); - let select = snowflake().verified_only_select(sql); - assert_eq!( - Expr::Like { - expr: Box::new(Expr::Identifier(Ident::new("name"))), - negated, - pattern: Box::new(Expr::Value(Value::SingleQuotedString("%a".to_string()))), - escape_char: None, - }, - select.selection.unwrap() - ); - - // Test with escape char - let sql = &format!( - "SELECT * FROM customers WHERE name {}LIKE '%a' ESCAPE '\\'", - if negated { "NOT " } else { "" } - ); - let select = snowflake().verified_only_select(sql); - assert_eq!( - Expr::Like { - expr: Box::new(Expr::Identifier(Ident::new("name"))), - negated, - pattern: Box::new(Expr::Value(Value::SingleQuotedString("%a".to_string()))), - escape_char: Some('\\'), - }, - select.selection.unwrap() - ); - - // This statement tests that LIKE and NOT LIKE have the same precedence. - // This was previously mishandled (#81). - let sql = &format!( - "SELECT * FROM customers WHERE name {}LIKE '%a' IS NULL", - if negated { "NOT " } else { "" } - ); - let select = snowflake().verified_only_select(sql); - assert_eq!( - Expr::IsNull(Box::new(Expr::Like { - expr: Box::new(Expr::Identifier(Ident::new("name"))), - negated, - pattern: Box::new(Expr::Value(Value::SingleQuotedString("%a".to_string()))), - escape_char: None, - })), - select.selection.unwrap() - ); - } - chk(false); - chk(true); -} - -#[test] -fn parse_similar_to() { - fn chk(negated: bool) { - let sql = &format!( - "SELECT * FROM customers WHERE name {}SIMILAR TO '%a'", - if negated { "NOT " } else { "" } - ); - let select = snowflake().verified_only_select(sql); - assert_eq!( - Expr::SimilarTo { - expr: Box::new(Expr::Identifier(Ident::new("name"))), - negated, - pattern: Box::new(Expr::Value(Value::SingleQuotedString("%a".to_string()))), - escape_char: None, - }, - select.selection.unwrap() - ); - - // Test with escape char - let sql = &format!( - "SELECT * FROM customers WHERE name {}SIMILAR TO '%a' ESCAPE '\\'", - if negated { "NOT " } else { "" } - ); - let select = snowflake().verified_only_select(sql); - assert_eq!( - Expr::SimilarTo { - expr: Box::new(Expr::Identifier(Ident::new("name"))), - negated, - pattern: Box::new(Expr::Value(Value::SingleQuotedString("%a".to_string()))), - escape_char: Some('\\'), - }, - select.selection.unwrap() - ); - - // This statement tests that SIMILAR TO and NOT SIMILAR TO have the same precedence. - let sql = &format!( - "SELECT * FROM customers WHERE name {}SIMILAR TO '%a' ESCAPE '\\' IS NULL", - if negated { "NOT " } else { "" } - ); - let select = snowflake().verified_only_select(sql); - assert_eq!( - Expr::IsNull(Box::new(Expr::SimilarTo { - expr: Box::new(Expr::Identifier(Ident::new("name"))), - negated, - pattern: Box::new(Expr::Value(Value::SingleQuotedString("%a".to_string()))), - escape_char: Some('\\'), - })), - select.selection.unwrap() - ); - } - chk(false); - chk(true); -} - #[test] fn test_array_agg_func() { for sql in [ @@ -444,6 +335,13 @@ fn snowflake() -> TestedDialects { } } +fn snowflake_without_unescape() -> TestedDialects { + TestedDialects { + dialects: vec![Box::new(SnowflakeDialect {})], + options: Some(ParserOptions::new().with_unescape(false)), + } +} + fn snowflake_and_generic() -> TestedDialects { TestedDialects { dialects: vec![Box::new(SnowflakeDialect {}), Box::new(GenericDialect {})], @@ -985,10 +883,10 @@ fn test_create_stage_with_file_format() { let sql = concat!( "CREATE OR REPLACE STAGE my_ext_stage ", "URL='s3://load/files/' ", - "FILE_FORMAT=(COMPRESSION=AUTO BINARY_FORMAT=HEX ESCAPE='\\')" + r#"FILE_FORMAT=(COMPRESSION=AUTO BINARY_FORMAT=HEX ESCAPE='\\')"# ); - match snowflake().verified_stmt(sql) { + match snowflake_without_unescape().verified_stmt(sql) { Statement::CreateStage { file_format, .. } => { assert!(file_format.options.contains(&DataLoadingOption { option_name: "COMPRESSION".to_string(), @@ -1003,12 +901,15 @@ fn test_create_stage_with_file_format() { assert!(file_format.options.contains(&DataLoadingOption { option_name: "ESCAPE".to_string(), option_type: DataLoadingOptionType::STRING, - value: "\\".to_string() + value: r#"\\"#.to_string() })); } _ => unreachable!(), }; - assert_eq!(snowflake().verified_stmt(sql).to_string(), sql); + assert_eq!( + snowflake_without_unescape().verified_stmt(sql).to_string(), + sql + ); } #[test] @@ -1243,10 +1144,10 @@ fn test_copy_into_file_format() { "FROM 'gcs://mybucket/./../a.csv' ", "FILES = ('file1.json', 'file2.json') ", "PATTERN = '.*employees0[1-5].csv.gz' ", - "FILE_FORMAT=(COMPRESSION=AUTO BINARY_FORMAT=HEX ESCAPE='\\')" + r#"FILE_FORMAT=(COMPRESSION=AUTO BINARY_FORMAT=HEX ESCAPE='\\')"# ); - match snowflake().verified_stmt(sql) { + match snowflake_without_unescape().verified_stmt(sql) { Statement::CopyIntoSnowflake { file_format, .. } => { assert!(file_format.options.contains(&DataLoadingOption { option_name: "COMPRESSION".to_string(), @@ -1261,12 +1162,15 @@ fn test_copy_into_file_format() { assert!(file_format.options.contains(&DataLoadingOption { option_name: "ESCAPE".to_string(), option_type: DataLoadingOptionType::STRING, - value: "\\".to_string() + value: r#"\\"#.to_string() })); } _ => unreachable!(), } - assert_eq!(snowflake().verified_stmt(sql).to_string(), sql); + assert_eq!( + snowflake_without_unescape().verified_stmt(sql).to_string(), + sql + ); } #[test] diff --git a/tests/sqlparser_sqlite.rs b/tests/sqlparser_sqlite.rs index c9d5d98cd..b90e45827 100644 --- a/tests/sqlparser_sqlite.rs +++ b/tests/sqlparser_sqlite.rs @@ -290,115 +290,6 @@ fn test_placeholder() { ); } -#[test] -fn parse_like() { - fn chk(negated: bool) { - let sql = &format!( - "SELECT * FROM customers WHERE name {}LIKE '%a'", - if negated { "NOT " } else { "" } - ); - let select = sqlite().verified_only_select(sql); - assert_eq!( - Expr::Like { - expr: Box::new(Expr::Identifier(Ident::new("name"))), - negated, - pattern: Box::new(Expr::Value(Value::SingleQuotedString("%a".to_string()))), - escape_char: None, - }, - select.selection.unwrap() - ); - - // Test with escape char - let sql = &format!( - "SELECT * FROM customers WHERE name {}LIKE '%a' ESCAPE '\\'", - if negated { "NOT " } else { "" } - ); - let select = sqlite().verified_only_select(sql); - assert_eq!( - Expr::Like { - expr: Box::new(Expr::Identifier(Ident::new("name"))), - negated, - pattern: Box::new(Expr::Value(Value::SingleQuotedString("%a".to_string()))), - escape_char: Some('\\'), - }, - select.selection.unwrap() - ); - - // This statement tests that LIKE and NOT LIKE have the same precedence. - // This was previously mishandled (#81). - let sql = &format!( - "SELECT * FROM customers WHERE name {}LIKE '%a' IS NULL", - if negated { "NOT " } else { "" } - ); - let select = sqlite().verified_only_select(sql); - assert_eq!( - Expr::IsNull(Box::new(Expr::Like { - expr: Box::new(Expr::Identifier(Ident::new("name"))), - negated, - pattern: Box::new(Expr::Value(Value::SingleQuotedString("%a".to_string()))), - escape_char: None, - })), - select.selection.unwrap() - ); - } - chk(false); - chk(true); -} - -#[test] -fn parse_similar_to() { - fn chk(negated: bool) { - let sql = &format!( - "SELECT * FROM customers WHERE name {}SIMILAR TO '%a'", - if negated { "NOT " } else { "" } - ); - let select = sqlite().verified_only_select(sql); - assert_eq!( - Expr::SimilarTo { - expr: Box::new(Expr::Identifier(Ident::new("name"))), - negated, - pattern: Box::new(Expr::Value(Value::SingleQuotedString("%a".to_string()))), - escape_char: None, - }, - select.selection.unwrap() - ); - - // Test with escape char - let sql = &format!( - "SELECT * FROM customers WHERE name {}SIMILAR TO '%a' ESCAPE '\\'", - if negated { "NOT " } else { "" } - ); - let select = sqlite().verified_only_select(sql); - assert_eq!( - Expr::SimilarTo { - expr: Box::new(Expr::Identifier(Ident::new("name"))), - negated, - pattern: Box::new(Expr::Value(Value::SingleQuotedString("%a".to_string()))), - escape_char: Some('\\'), - }, - select.selection.unwrap() - ); - - // This statement tests that SIMILAR TO and NOT SIMILAR TO have the same precedence. - let sql = &format!( - "SELECT * FROM customers WHERE name {}SIMILAR TO '%a' ESCAPE '\\' IS NULL", - if negated { "NOT " } else { "" } - ); - let select = sqlite().verified_only_select(sql); - assert_eq!( - Expr::IsNull(Box::new(Expr::SimilarTo { - expr: Box::new(Expr::Identifier(Ident::new("name"))), - negated, - pattern: Box::new(Expr::Value(Value::SingleQuotedString("%a".to_string()))), - escape_char: Some('\\'), - })), - select.selection.unwrap() - ); - } - chk(false); - chk(true); -} - #[test] fn parse_create_table_with_strict() { let sql = "CREATE TABLE Fruits (id TEXT NOT NULL PRIMARY KEY) STRICT"; From bf89b7d808f331aaac5cdb281ae66995ffd06fdc Mon Sep 17 00:00:00 2001 From: tison Date: Sun, 21 Apr 2024 21:13:18 +0800 Subject: [PATCH 31/42] Encapsulate `Insert` and `Delete` into specific structs (#1224) Signed-off-by: tison --- src/ast/dml.rs | 84 +++++++++++++++++++++++++++ src/ast/mod.rs | 110 +++++++++++------------------------- src/parser/mod.rs | 10 ++-- tests/sqlparser_bigquery.rs | 4 +- tests/sqlparser_common.rs | 42 +++++++------- tests/sqlparser_mysql.rs | 44 +++++++-------- tests/sqlparser_postgres.rs | 40 ++++++------- 7 files changed, 187 insertions(+), 147 deletions(-) create mode 100644 src/ast/dml.rs diff --git a/src/ast/dml.rs b/src/ast/dml.rs new file mode 100644 index 000000000..badc58a7d --- /dev/null +++ b/src/ast/dml.rs @@ -0,0 +1,84 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#[cfg(not(feature = "std"))] +use alloc::{boxed::Box, vec::Vec}; + +#[cfg(feature = "serde")] +use serde::{Deserialize, Serialize}; +#[cfg(feature = "visitor")] +use sqlparser_derive::{Visit, VisitMut}; + +use super::{ + Expr, FromTable, Ident, InsertAliases, MysqlInsertPriority, ObjectName, OnInsert, OrderByExpr, + Query, SelectItem, SqliteOnConflict, TableWithJoins, +}; + +/// INSERT statement. +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct Insert { + /// Only for Sqlite + pub or: Option, + /// Only for mysql + pub ignore: bool, + /// INTO - optional keyword + pub into: bool, + /// TABLE + #[cfg_attr(feature = "visitor", visit(with = "visit_relation"))] + pub table_name: ObjectName, + /// table_name as foo (for PostgreSQL) + pub table_alias: Option, + /// COLUMNS + pub columns: Vec, + /// Overwrite (Hive) + pub overwrite: bool, + /// A SQL query that specifies what to insert + pub source: Option>, + /// partitioned insert (Hive) + pub partitioned: Option>, + /// Columns defined after PARTITION + pub after_columns: Vec, + /// whether the insert has the table keyword (Hive) + pub table: bool, + pub on: Option, + /// RETURNING + pub returning: Option>, + /// Only for mysql + pub replace_into: bool, + /// Only for mysql + pub priority: Option, + /// Only for mysql + pub insert_alias: Option, +} + +/// DELETE statement. +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct Delete { + /// Multi tables delete are supported in mysql + pub tables: Vec, + /// FROM + pub from: FromTable, + /// USING (Snowflake, Postgres, MySQL) + pub using: Option>, + /// WHERE + pub selection: Option, + /// RETURNING + pub returning: Option>, + /// ORDER BY (MySQL) + pub order_by: Vec, + /// LIMIT (MySQL) + pub limit: Option, +} diff --git a/src/ast/mod.rs b/src/ast/mod.rs index b78a559a0..f02461e0e 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -37,6 +37,7 @@ pub use self::ddl::{ ReferentialAction, TableConstraint, UserDefinedTypeCompositeAttributeDef, UserDefinedTypeRepresentation, ViewColumnDef, }; +pub use self::dml::{Delete, Insert}; pub use self::operator::{BinaryOperator, UnaryOperator}; pub use self::query::{ Cte, CteAsMaterialized, Distinct, ExceptSelectItem, ExcludeSelectItem, Fetch, ForClause, @@ -60,6 +61,7 @@ pub use visitor::*; mod data_type; mod dcl; mod ddl; +mod dml; pub mod helpers; mod operator; mod query; @@ -1800,40 +1802,7 @@ pub enum Statement { /// ```sql /// INSERT /// ``` - Insert { - /// Only for Sqlite - or: Option, - /// Only for mysql - ignore: bool, - /// INTO - optional keyword - into: bool, - /// TABLE - #[cfg_attr(feature = "visitor", visit(with = "visit_relation"))] - table_name: ObjectName, - /// table_name as foo (for PostgreSQL) - table_alias: Option, - /// COLUMNS - columns: Vec, - /// Overwrite (Hive) - overwrite: bool, - /// A SQL query that specifies what to insert - source: Option>, - /// partitioned insert (Hive) - partitioned: Option>, - /// Columns defined after PARTITION - after_columns: Vec, - /// whether the insert has the table keyword (Hive) - table: bool, - on: Option, - /// RETURNING - returning: Option>, - /// Only for mysql - replace_into: bool, - /// Only for mysql - priority: Option, - /// Only for mysql - insert_alias: Option, - }, + Insert(Insert), /// ```sql /// INSTALL /// ``` @@ -1923,22 +1892,7 @@ pub enum Statement { /// ```sql /// DELETE /// ``` - Delete { - /// Multi tables delete are supported in mysql - tables: Vec, - /// FROM - from: FromTable, - /// USING (Snowflake, Postgres, MySQL) - using: Option>, - /// WHERE - selection: Option, - /// RETURNING - returning: Option>, - /// ORDER BY (MySQL) - order_by: Vec, - /// LIMIT (MySQL) - limit: Option, - }, + Delete(Delete), /// ```sql /// CREATE VIEW /// ``` @@ -2912,24 +2866,25 @@ impl fmt::Display for Statement { } Ok(()) } - Statement::Insert { - or, - ignore, - into, - table_name, - table_alias, - overwrite, - partitioned, - columns, - after_columns, - source, - table, - on, - returning, - replace_into, - priority, - insert_alias, - } => { + Statement::Insert(insert) => { + let Insert { + or, + ignore, + into, + table_name, + table_alias, + overwrite, + partitioned, + columns, + after_columns, + source, + table, + on, + returning, + replace_into, + priority, + insert_alias, + } = insert; let table_name = if let Some(alias) = table_alias { format!("{table_name} AS {alias}") } else { @@ -3074,15 +3029,16 @@ impl fmt::Display for Statement { } Ok(()) } - Statement::Delete { - tables, - from, - using, - selection, - returning, - order_by, - limit, - } => { + Statement::Delete(delete) => { + let Delete { + tables, + from, + using, + selection, + returning, + order_by, + limit, + } = delete; write!(f, "DELETE ")?; if !tables.is_empty() { write!(f, "{} ", display_comma_separated(tables))?; diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 9ad27b16a..c19019074 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -7082,7 +7082,7 @@ impl<'a> Parser<'a> { None }; - Ok(Statement::Delete { + Ok(Statement::Delete(Delete { tables, from: if with_from_keyword { FromTable::WithFromKeyword(from) @@ -7094,7 +7094,7 @@ impl<'a> Parser<'a> { returning, order_by, limit, - }) + })) } // KILL [CONNECTION | QUERY | MUTATION] processlist_id @@ -8658,7 +8658,7 @@ impl<'a> Parser<'a> { } let insert = &mut self.parse_insert()?; - if let Statement::Insert { replace_into, .. } = insert { + if let Statement::Insert(Insert { replace_into, .. }) = insert { *replace_into = true; } @@ -8826,7 +8826,7 @@ impl<'a> Parser<'a> { None }; - Ok(Statement::Insert { + Ok(Statement::Insert(Insert { or, table_name, table_alias, @@ -8843,7 +8843,7 @@ impl<'a> Parser<'a> { replace_into, priority, insert_alias, - }) + })) } } diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index a01c09d96..170af820d 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -90,10 +90,10 @@ fn parse_raw_literal() { fn parse_delete_statement() { let sql = "DELETE \"table\" WHERE 1"; match bigquery_and_generic().verified_stmt(sql) { - Statement::Delete { + Statement::Delete(Delete { from: FromTable::WithoutKeyword(from), .. - } => { + }) => { assert_eq!( TableFactor::Table { name: ObjectName(vec![Ident::with_quote('"', "table")]), diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index dd447ebb8..82cb600c5 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -84,12 +84,12 @@ fn parse_insert_values() { expected_rows: &[Vec], ) { match verified_stmt(sql) { - Statement::Insert { + Statement::Insert(Insert { table_name, columns, source: Some(source), .. - } => { + }) => { assert_eq!(table_name.to_string(), expected_table_name); assert_eq!(columns.len(), expected_columns.len()); for (index, column) in columns.iter().enumerate() { @@ -125,7 +125,7 @@ fn parse_insert_default_values() { let insert_with_default_values = verified_stmt("INSERT INTO test_table DEFAULT VALUES"); match insert_with_default_values { - Statement::Insert { + Statement::Insert(Insert { after_columns, columns, on, @@ -134,7 +134,7 @@ fn parse_insert_default_values() { source, table_name, .. - } => { + }) => { assert_eq!(columns, vec![]); assert_eq!(after_columns, vec![]); assert_eq!(on, None); @@ -150,7 +150,7 @@ fn parse_insert_default_values() { verified_stmt("INSERT INTO test_table DEFAULT VALUES RETURNING test_column"); match insert_with_default_values_and_returning { - Statement::Insert { + Statement::Insert(Insert { after_columns, columns, on, @@ -159,7 +159,7 @@ fn parse_insert_default_values() { source, table_name, .. - } => { + }) => { assert_eq!(after_columns, vec![]); assert_eq!(columns, vec![]); assert_eq!(on, None); @@ -175,7 +175,7 @@ fn parse_insert_default_values() { verified_stmt("INSERT INTO test_table DEFAULT VALUES ON CONFLICT DO NOTHING"); match insert_with_default_values_and_on_conflict { - Statement::Insert { + Statement::Insert(Insert { after_columns, columns, on, @@ -184,7 +184,7 @@ fn parse_insert_default_values() { source, table_name, .. - } => { + }) => { assert_eq!(after_columns, vec![]); assert_eq!(columns, vec![]); assert!(on.is_some()); @@ -230,11 +230,11 @@ fn parse_insert_select_returning() { verified_stmt("INSERT INTO t SELECT 1 RETURNING 2"); let stmt = verified_stmt("INSERT INTO t SELECT x RETURNING x AS y"); match stmt { - Statement::Insert { + Statement::Insert(Insert { returning: Some(ret), source: Some(_), .. - } => assert_eq!(ret.len(), 1), + }) => assert_eq!(ret.len(), 1), _ => unreachable!(), } } @@ -255,7 +255,7 @@ fn parse_insert_sqlite() { .pop() .unwrap() { - Statement::Insert { or, .. } => assert_eq!(or, expected_action), + Statement::Insert(Insert { or, .. }) => assert_eq!(or, expected_action), _ => panic!("{}", sql), }; @@ -545,10 +545,10 @@ fn parse_no_table_name() { fn parse_delete_statement() { let sql = "DELETE FROM \"table\""; match verified_stmt(sql) { - Statement::Delete { + Statement::Delete(Delete { from: FromTable::WithFromKeyword(from), .. - } => { + }) => { assert_eq!( TableFactor::Table { name: ObjectName(vec![Ident::with_quote('"', "table")]), @@ -582,11 +582,11 @@ fn parse_delete_statement_for_multi_tables() { let sql = "DELETE schema1.table1, schema2.table2 FROM schema1.table1 JOIN schema2.table2 ON schema2.table2.col1 = schema1.table1.col1 WHERE schema2.table2.col2 = 1"; let dialects = all_dialects_except(|d| d.is::() || d.is::()); match dialects.verified_stmt(sql) { - Statement::Delete { + Statement::Delete(Delete { tables, from: FromTable::WithFromKeyword(from), .. - } => { + }) => { assert_eq!( ObjectName(vec![Ident::new("schema1"), Ident::new("table1")]), tables[0] @@ -626,11 +626,11 @@ fn parse_delete_statement_for_multi_tables() { fn parse_delete_statement_for_multi_tables_with_using() { let sql = "DELETE FROM schema1.table1, schema2.table2 USING schema1.table1 JOIN schema2.table2 ON schema2.table2.pk = schema1.table1.col1 WHERE schema2.table2.col2 = 1"; match verified_stmt(sql) { - Statement::Delete { + Statement::Delete(Delete { from: FromTable::WithFromKeyword(from), using: Some(using), .. - } => { + }) => { assert_eq!( TableFactor::Table { name: ObjectName(vec![Ident::new("schema1"), Ident::new("table1")]), @@ -686,14 +686,14 @@ fn parse_where_delete_statement() { let sql = "DELETE FROM foo WHERE name = 5"; match verified_stmt(sql) { - Statement::Delete { + Statement::Delete(Delete { tables: _, from: FromTable::WithFromKeyword(from), using, selection, returning, .. - } => { + }) => { assert_eq!( TableFactor::Table { name: ObjectName(vec![Ident::new("foo")]), @@ -727,14 +727,14 @@ fn parse_where_delete_with_alias_statement() { let sql = "DELETE FROM basket AS a USING basket AS b WHERE a.id < b.id"; match verified_stmt(sql) { - Statement::Delete { + Statement::Delete(Delete { tables: _, from: FromTable::WithFromKeyword(from), using, selection, returning, .. - } => { + }) => { assert_eq!( TableFactor::Table { name: ObjectName(vec![Ident::new("basket")]), diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index b2c164e3d..0fcf61d0b 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -1211,13 +1211,13 @@ fn parse_simple_insert() { let sql = r"INSERT INTO tasks (title, priority) VALUES ('Test Some Inserts', 1), ('Test Entry 2', 2), ('Test Entry 3', 3)"; match mysql().verified_stmt(sql) { - Statement::Insert { + Statement::Insert(Insert { table_name, columns, source, on, .. - } => { + }) => { assert_eq!(ObjectName(vec![Ident::new("tasks")]), table_name); assert_eq!(vec![Ident::new("title"), Ident::new("priority")], columns); assert!(on.is_none()); @@ -1263,14 +1263,14 @@ fn parse_ignore_insert() { let sql = r"INSERT IGNORE INTO tasks (title, priority) VALUES ('Test Some Inserts', 1)"; match mysql_and_generic().verified_stmt(sql) { - Statement::Insert { + Statement::Insert(Insert { table_name, columns, source, on, ignore, .. - } => { + }) => { assert_eq!(ObjectName(vec![Ident::new("tasks")]), table_name); assert_eq!(vec![Ident::new("title"), Ident::new("priority")], columns); assert!(on.is_none()); @@ -1305,14 +1305,14 @@ fn parse_priority_insert() { let sql = r"INSERT HIGH_PRIORITY INTO tasks (title, priority) VALUES ('Test Some Inserts', 1)"; match mysql_and_generic().verified_stmt(sql) { - Statement::Insert { + Statement::Insert(Insert { table_name, columns, source, on, priority, .. - } => { + }) => { assert_eq!(ObjectName(vec![Ident::new("tasks")]), table_name); assert_eq!(vec![Ident::new("title"), Ident::new("priority")], columns); assert!(on.is_none()); @@ -1344,14 +1344,14 @@ fn parse_priority_insert() { let sql2 = r"INSERT LOW_PRIORITY INTO tasks (title, priority) VALUES ('Test Some Inserts', 1)"; match mysql().verified_stmt(sql2) { - Statement::Insert { + Statement::Insert(Insert { table_name, columns, source, on, priority, .. - } => { + }) => { assert_eq!(ObjectName(vec![Ident::new("tasks")]), table_name); assert_eq!(vec![Ident::new("title"), Ident::new("priority")], columns); assert!(on.is_none()); @@ -1385,13 +1385,13 @@ fn parse_priority_insert() { fn parse_insert_as() { let sql = r"INSERT INTO `table` (`date`) VALUES ('2024-01-01') AS `alias`"; match mysql_and_generic().verified_stmt(sql) { - Statement::Insert { + Statement::Insert(Insert { table_name, columns, source, insert_alias, .. - } => { + }) => { assert_eq!( ObjectName(vec![Ident::with_quote('`', "table")]), table_name @@ -1435,13 +1435,13 @@ fn parse_insert_as() { let sql = r"INSERT INTO `table` (`id`, `date`) VALUES (1, '2024-01-01') AS `alias` (`mek_id`, `mek_date`)"; match mysql_and_generic().verified_stmt(sql) { - Statement::Insert { + Statement::Insert(Insert { table_name, columns, source, insert_alias, .. - } => { + }) => { assert_eq!( ObjectName(vec![Ident::with_quote('`', "table")]), table_name @@ -1491,7 +1491,7 @@ fn parse_insert_as() { fn parse_replace_insert() { let sql = r"REPLACE DELAYED INTO tasks (title, priority) VALUES ('Test Some Inserts', 1)"; match mysql().verified_stmt(sql) { - Statement::Insert { + Statement::Insert(Insert { table_name, columns, source, @@ -1499,7 +1499,7 @@ fn parse_replace_insert() { replace_into, priority, .. - } => { + }) => { assert_eq!(ObjectName(vec![Ident::new("tasks")]), table_name); assert_eq!(vec![Ident::new("title"), Ident::new("priority")], columns); assert!(on.is_none()); @@ -1535,13 +1535,13 @@ fn parse_empty_row_insert() { let sql = "INSERT INTO tb () VALUES (), ()"; match mysql().one_statement_parses_to(sql, "INSERT INTO tb VALUES (), ()") { - Statement::Insert { + Statement::Insert(Insert { table_name, columns, source, on, .. - } => { + }) => { assert_eq!(ObjectName(vec![Ident::new("tb")]), table_name); assert!(columns.is_empty()); assert!(on.is_none()); @@ -1572,13 +1572,13 @@ fn parse_insert_with_on_duplicate_update() { let sql = "INSERT INTO permission_groups (name, description, perm_create, perm_read, perm_update, perm_delete) VALUES ('accounting_manager', 'Some description about the group', true, true, true, true) ON DUPLICATE KEY UPDATE description = VALUES(description), perm_create = VALUES(perm_create), perm_read = VALUES(perm_read), perm_update = VALUES(perm_update), perm_delete = VALUES(perm_delete)"; match mysql().verified_stmt(sql) { - Statement::Insert { + Statement::Insert(Insert { table_name, columns, source, on, .. - } => { + }) => { assert_eq!( ObjectName(vec![Ident::new("permission_groups")]), table_name @@ -1804,11 +1804,11 @@ fn parse_select_with_concatenation_of_exp_number_and_numeric_prefix_column() { fn parse_insert_with_numeric_prefix_column_name() { let sql = "INSERT INTO s1.t1 (123col_$@length123) VALUES (67.654)"; match mysql().verified_stmt(sql) { - Statement::Insert { + Statement::Insert(Insert { table_name, columns, .. - } => { + }) => { assert_eq!( ObjectName(vec![Ident::new("s1"), Ident::new("t1")]), table_name @@ -1898,7 +1898,7 @@ fn parse_update_with_joins() { fn parse_delete_with_order_by() { let sql = "DELETE FROM customers ORDER BY id DESC"; match mysql().verified_stmt(sql) { - Statement::Delete { order_by, .. } => { + Statement::Delete(Delete { order_by, .. }) => { assert_eq!( vec![OrderByExpr { expr: Expr::Identifier(Ident { @@ -1919,7 +1919,7 @@ fn parse_delete_with_order_by() { fn parse_delete_with_limit() { let sql = "DELETE FROM customers LIMIT 100"; match mysql().verified_stmt(sql) { - Statement::Delete { limit, .. } => { + Statement::Delete(Delete { limit, .. }) => { assert_eq!(Some(Expr::Value(number("100"))), limit); } _ => unreachable!(), diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 6bb4bc69b..356651af4 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -1482,12 +1482,12 @@ fn parse_prepare() { _ => unreachable!(), }; match sub_stmt.as_ref() { - Statement::Insert { + Statement::Insert(Insert { table_name, columns, source: Some(source), .. - } => { + }) => { assert_eq!(table_name.to_string(), "customers"); assert!(columns.is_empty()); @@ -1539,14 +1539,14 @@ fn parse_pg_on_conflict() { DO UPDATE SET dname = EXCLUDED.dname", ); match stmt { - Statement::Insert { + Statement::Insert(Insert { on: Some(OnInsert::OnConflict(OnConflict { conflict_target: Some(ConflictTarget::Columns(cols)), action, })), .. - } => { + }) => { assert_eq!(vec![Ident::from("did")], cols); assert_eq!( OnConflictAction::DoUpdate(DoUpdate { @@ -1569,14 +1569,14 @@ fn parse_pg_on_conflict() { DO UPDATE SET dname = EXCLUDED.dname, area = EXCLUDED.area", ); match stmt { - Statement::Insert { + Statement::Insert(Insert { on: Some(OnInsert::OnConflict(OnConflict { conflict_target: Some(ConflictTarget::Columns(cols)), action, })), .. - } => { + }) => { assert_eq!(vec![Ident::from("did"), Ident::from("area"),], cols); assert_eq!( OnConflictAction::DoUpdate(DoUpdate { @@ -1607,14 +1607,14 @@ fn parse_pg_on_conflict() { ON CONFLICT DO NOTHING", ); match stmt { - Statement::Insert { + Statement::Insert(Insert { on: Some(OnInsert::OnConflict(OnConflict { conflict_target: None, action, })), .. - } => { + }) => { assert_eq!(OnConflictAction::DoNothing, action); } _ => unreachable!(), @@ -1627,14 +1627,14 @@ fn parse_pg_on_conflict() { DO UPDATE SET dname = $1 WHERE dsize > $2", ); match stmt { - Statement::Insert { + Statement::Insert(Insert { on: Some(OnInsert::OnConflict(OnConflict { conflict_target: Some(ConflictTarget::Columns(cols)), action, })), .. - } => { + }) => { assert_eq!(vec![Ident::from("did")], cols); assert_eq!( OnConflictAction::DoUpdate(DoUpdate { @@ -1664,14 +1664,14 @@ fn parse_pg_on_conflict() { DO UPDATE SET dname = $1 WHERE dsize > $2", ); match stmt { - Statement::Insert { + Statement::Insert(Insert { on: Some(OnInsert::OnConflict(OnConflict { conflict_target: Some(ConflictTarget::OnConstraint(cname)), action, })), .. - } => { + }) => { assert_eq!(vec![Ident::from("distributors_did_pkey")], cname.0); assert_eq!( OnConflictAction::DoUpdate(DoUpdate { @@ -1701,7 +1701,7 @@ fn parse_pg_returning() { "INSERT INTO distributors (did, dname) VALUES (DEFAULT, 'XYZ Widgets') RETURNING did", ); match stmt { - Statement::Insert { returning, .. } => { + Statement::Insert(Insert { returning, .. }) => { assert_eq!( Some(vec![SelectItem::UnnamedExpr(Expr::Identifier( "did".into() @@ -1739,7 +1739,7 @@ fn parse_pg_returning() { let stmt = pg_and_generic().verified_stmt("DELETE FROM tasks WHERE status = 'DONE' RETURNING *"); match stmt { - Statement::Delete { returning, .. } => { + Statement::Delete(Delete { returning, .. }) => { assert_eq!( Some(vec![SelectItem::Wildcard( WildcardAdditionalOptions::default() @@ -3570,7 +3570,7 @@ fn test_simple_postgres_insert_with_alias() { assert_eq!( statement, - Statement::Insert { + Statement::Insert(Insert { or: None, ignore: false, into: true, @@ -3621,7 +3621,7 @@ fn test_simple_postgres_insert_with_alias() { replace_into: false, priority: None, insert_alias: None - } + }) ) } @@ -3634,7 +3634,7 @@ fn test_simple_postgres_insert_with_alias() { assert_eq!( statement, - Statement::Insert { + Statement::Insert(Insert { or: None, ignore: false, into: true, @@ -3688,7 +3688,7 @@ fn test_simple_postgres_insert_with_alias() { replace_into: false, priority: None, insert_alias: None - } + }) ) } @@ -3700,7 +3700,7 @@ fn test_simple_insert_with_quoted_alias() { assert_eq!( statement, - Statement::Insert { + Statement::Insert(Insert { or: None, ignore: false, into: true, @@ -3751,7 +3751,7 @@ fn test_simple_insert_with_quoted_alias() { replace_into: false, priority: None, insert_alias: None, - } + }) ) } From 39980e89765041c194acfbb7e347291c9ed7f730 Mon Sep 17 00:00:00 2001 From: Joey Hain Date: Mon, 22 Apr 2024 13:17:50 -0700 Subject: [PATCH 32/42] Support Snowflake `MATCH_RECOGNIZE` syntax (#1222) --- src/ast/mod.rs | 16 +- src/ast/query.rs | 266 ++++++++++++++++++++++++++++++++++ src/dialect/generic.rs | 4 + src/dialect/mod.rs | 4 + src/dialect/snowflake.rs | 4 + src/keywords.rs | 11 ++ src/parser/mod.rs | 237 ++++++++++++++++++++++++++++++ src/test_utils.rs | 17 +++ tests/sqlparser_common.rs | 297 +++++++++++++++++++++++++++++++++++++- 9 files changed, 847 insertions(+), 9 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index f02461e0e..32c7f6e9b 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -40,13 +40,15 @@ pub use self::ddl::{ pub use self::dml::{Delete, Insert}; pub use self::operator::{BinaryOperator, UnaryOperator}; pub use self::query::{ - Cte, CteAsMaterialized, Distinct, ExceptSelectItem, ExcludeSelectItem, Fetch, ForClause, - ForJson, ForXml, GroupByExpr, IdentWithAlias, IlikeSelectItem, Join, JoinConstraint, - JoinOperator, JsonTableColumn, JsonTableColumnErrorHandling, LateralView, LockClause, LockType, - NamedWindowDefinition, NonBlock, Offset, OffsetRows, OrderByExpr, Query, RenameSelectItem, - ReplaceSelectElement, ReplaceSelectItem, Select, SelectInto, SelectItem, SetExpr, SetOperator, - SetQuantifier, Table, TableAlias, TableFactor, TableVersion, TableWithJoins, Top, TopQuantity, - ValueTableMode, Values, WildcardAdditionalOptions, With, + AfterMatchSkip, Cte, CteAsMaterialized, Distinct, EmptyMatchesMode, ExceptSelectItem, + ExcludeSelectItem, Fetch, ForClause, ForJson, ForXml, GroupByExpr, IdentWithAlias, + IlikeSelectItem, Join, JoinConstraint, JoinOperator, JsonTableColumn, + JsonTableColumnErrorHandling, LateralView, LockClause, LockType, MatchRecognizePattern, + MatchRecognizeSymbol, Measure, NamedWindowDefinition, NonBlock, Offset, OffsetRows, + OrderByExpr, Query, RenameSelectItem, RepetitionQuantifier, ReplaceSelectElement, + ReplaceSelectItem, RowsPerMatch, Select, SelectInto, SelectItem, SetExpr, SetOperator, + SetQuantifier, SymbolDefinition, Table, TableAlias, TableFactor, TableVersion, TableWithJoins, + Top, TopQuantity, ValueTableMode, Values, WildcardAdditionalOptions, With, }; pub use self::value::{ escape_quoted_string, DateTimeField, DollarQuotedString, TrimWhereField, Value, diff --git a/src/ast/query.rs b/src/ast/query.rs index 391ef51d8..5f5bca4cc 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -852,6 +852,238 @@ pub enum TableFactor { columns: Vec, alias: Option, }, + /// A `MATCH_RECOGNIZE` operation on a table. + /// + /// See . + MatchRecognize { + table: Box, + /// `PARTITION BY [, ... ]` + partition_by: Vec, + /// `ORDER BY [, ... ]` + order_by: Vec, + /// `MEASURES [AS] [, ... ]` + measures: Vec, + /// `ONE ROW PER MATCH | ALL ROWS PER MATCH [