From 9ede9d3b25582638bd6c2cfca6ec8805e4f12071 Mon Sep 17 00:00:00 2001 From: gwenn Date: Thu, 9 May 2024 10:35:57 +0200 Subject: [PATCH 01/22] Check DISTINCT aggregates arguments --- src/lexer/sql/test.rs | 12 ++++++++++++ src/parser/ast/mod.rs | 24 ++++++++++++++++++++++++ src/parser/parse.y | 8 ++++---- 3 files changed, 40 insertions(+), 4 deletions(-) diff --git a/src/lexer/sql/test.rs b/src/lexer/sql/test.rs index 5280c71..bf35de5 100644 --- a/src/lexer/sql/test.rs +++ b/src/lexer/sql/test.rs @@ -324,6 +324,18 @@ fn cast_without_typename() { parse_cmd(b"SELECT CAST(a AS ) FROM t"); } +#[test] +fn distinct_aggregates() { + expect_parser_err_msg( + b"SELECT count(DISTINCT) FROM t", + "DISTINCT aggregates must have exactly one argument", + ); + expect_parser_err_msg( + b"SELECT count(DISTINCT a,b) FROM t", + "DISTINCT aggregates must have exactly one argument", + ); +} + #[test] fn unknown_table_option() { expect_parser_err_msg(b"CREATE TABLE t(x)o", "unknown table option: o"); diff --git a/src/parser/ast/mod.rs b/src/parser/ast/mod.rs index b2e2fa3..fbb9180 100644 --- a/src/parser/ast/mod.rs +++ b/src/parser/ast/mod.rs @@ -487,6 +487,30 @@ impl Expr { pub fn sub_query(query: Select) -> Expr { Expr::Subquery(Box::new(query)) } + /// Constructor + pub fn function_call( + xt: YYCODETYPE, + x: Token, + distinctness: Option, + args: Option>, + order_by: Option>, + filter_over: Option, + ) -> Result { + if let Some(Distinctness::Distinct) = distinctness { + if args.as_ref().map_or(0, |v| v.len()) != 1 { + return Err(custom_err!( + "DISTINCT aggregates must have exactly one argument" + )); + } + } + Ok(Expr::FunctionCall { + name: Id::from_token(xt, x), + distinctness, + args, + order_by, + filter_over, + }) + } } /// SQL literal diff --git a/src/parser/parse.y b/src/parser/parse.y index 15e287e..ee4c314 100644 --- a/src/parser/parse.y +++ b/src/parser/parse.y @@ -921,10 +921,10 @@ expr(A) ::= CAST LP expr(E) AS typetoken(T) RP. { %endif SQLITE_OMIT_CAST expr(A) ::= idj(X) LP distinct(D) exprlist(Y) RP. { - A = Expr::FunctionCall{ name: Id::from_token(@X, X), distinctness: D, args: Y, order_by: None, filter_over: None }; /*A-overwrites-X*/ + A = Expr::function_call(@X, X, D, Y, None, None)?; /*A-overwrites-X*/ } expr(A) ::= idj(X) LP distinct(D) exprlist(Y) ORDER BY sortlist(O) RP. { - A = Expr::FunctionCall{ name: Id::from_token(@X, X), distinctness: D, args: Y, order_by: Some(O), filter_over: None }; /*A-overwrites-X*/ + A = Expr::function_call(@X, X, D, Y, Some(O), None)?; /*A-overwrites-X*/ } expr(A) ::= idj(X) LP STAR RP. { A = Expr::FunctionCallStar{ name: Id::from_token(@X, X), filter_over: None }; /*A-overwrites-X*/ @@ -932,10 +932,10 @@ expr(A) ::= idj(X) LP STAR RP. { %ifndef SQLITE_OMIT_WINDOWFUNC expr(A) ::= idj(X) LP distinct(D) exprlist(Y) RP filter_over(Z). { - A = Expr::FunctionCall{ name: Id::from_token(@X, X), distinctness: D, args: Y, order_by: None, filter_over: Some(Z) }; /*A-overwrites-X*/ + A = Expr::function_call(@X, X, D, Y, None, Some(Z))?; /*A-overwrites-X*/ } expr(A) ::= idj(X) LP distinct(D) exprlist(Y) ORDER BY sortlist(O) RP filter_over(Z). { - A = Expr::FunctionCall{ name: Id::from_token(@X, X), distinctness: D, args: Y, order_by: Some(O), filter_over: Some(Z) }; /*A-overwrites-X*/ + A = Expr::function_call(@X, X, D, Y, Some(O), Some(Z))?; /*A-overwrites-X*/ } expr(A) ::= idj(X) LP STAR RP filter_over(Z). { A = Expr::FunctionCallStar{ name: Id::from_token(@X, X), filter_over: Some(Z) }; /*A-overwrites-X*/ From 230b36f1eb1fb717944ca70ef0b28985d55b5184 Mon Sep 17 00:00:00 2001 From: gwenn Date: Thu, 9 May 2024 11:25:41 +0200 Subject: [PATCH 02/22] Check CTE column count --- src/lexer/sql/test.rs | 9 +++++++++ src/parser/ast/mod.rs | 26 ++++++++++++++++++++++++++ src/parser/parse.y | 2 +- 3 files changed, 36 insertions(+), 1 deletion(-) diff --git a/src/lexer/sql/test.rs b/src/lexer/sql/test.rs index bf35de5..67763d8 100644 --- a/src/lexer/sql/test.rs +++ b/src/lexer/sql/test.rs @@ -336,6 +336,15 @@ fn distinct_aggregates() { ); } +#[test] +fn cte_column_count() { + expect_parser_err_msg( + b"WITH i(x, y) AS ( VALUES(1) ) + SELECT * FROM i;", + "table i has 1 values for 2 columns", + ) +} + #[test] fn unknown_table_option() { expect_parser_err_msg(b"CREATE TABLE t(x)o", "unknown table option: o"); diff --git a/src/parser/ast/mod.rs b/src/parser/ast/mod.rs index fbb9180..f47322f 100644 --- a/src/parser/ast/mod.rs +++ b/src/parser/ast/mod.rs @@ -1704,6 +1704,32 @@ pub struct CommonTableExpr { } impl CommonTableExpr { + /// Constructor + pub fn new( + tbl_name: Name, + columns: Option>, + materialized: Materialized, + select: Select, + ) -> Result { + if let Some(ref columns) = columns { + if let check::ColumnCount::Fixed(cc) = select.column_count() { + if cc != columns.len() { + return Err(custom_err!( + "table {} has {} values for {} columns", + tbl_name, + cc, + columns.len() + )); + } + } + } + Ok(CommonTableExpr { + tbl_name, + columns, + materialized, + select, + }) + } /// Constructor pub fn add_cte( ctes: &mut Vec, diff --git a/src/parser/parse.y b/src/parser/parse.y index ee4c314..ce6c027 100644 --- a/src/parser/parse.y +++ b/src/parser/parse.y @@ -1373,7 +1373,7 @@ wqas(A) ::= AS. {A = Materialized::Any;} wqas(A) ::= AS MATERIALIZED. {A = Materialized::Yes;} wqas(A) ::= AS NOT MATERIALIZED. {A = Materialized::No;} wqitem(A) ::= nm(X) eidlist_opt(Y) wqas(M) LP select(Z) RP. { - A = CommonTableExpr{ tbl_name: X, columns: Y, materialized: M, select: Z }; /*A-overwrites-X*/ + A = CommonTableExpr::new(X, Y, M, Z)?; /*A-overwrites-X*/ } wqlist(A) ::= wqitem(X). { A = vec![X]; /*A-overwrites-X*/ From 1da77dcfb2033a1f5f7dd948abab90dbcc0f4e15 Mon Sep 17 00:00:00 2001 From: gwenn Date: Thu, 9 May 2024 12:05:23 +0200 Subject: [PATCH 03/22] Check unknown join type --- src/lexer/sql/test.rs | 12 ++++++++++++ src/parser/ast/mod.rs | 8 ++++---- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/src/lexer/sql/test.rs b/src/lexer/sql/test.rs index 67763d8..2fde213 100644 --- a/src/lexer/sql/test.rs +++ b/src/lexer/sql/test.rs @@ -345,6 +345,18 @@ fn cte_column_count() { ) } +#[test] +fn unknown_join_type() { + expect_parser_err_msg( + b"SELECT * FROM t1 INNER OUTER JOIN t2;", + "unknown join type: INNER OUTER ", + ); + expect_parser_err_msg( + b"SELECT * FROM t1 LEFT BOGUS JOIN t2;", + "unknown join type: BOGUS", + ) +} + #[test] fn unknown_table_option() { expect_parser_err_msg(b"CREATE TABLE t(x)o", "unknown table option: o"); diff --git a/src/parser/ast/mod.rs b/src/parser/ast/mod.rs index f47322f..b51d2a7 100644 --- a/src/parser/ast/mod.rs +++ b/src/parser/ast/mod.rs @@ -920,10 +920,10 @@ impl JoinOperator { || (jt & (JoinType::OUTER | JoinType::LEFT | JoinType::RIGHT)) == JoinType::OUTER { return Err(custom_err!( - "unsupported JOIN type: {} {:?} {:?}", + "unknown join type: {} {} {}", t, - n1, - n2 + n1.as_ref().map_or("", |n| n.0.as_str()), + n2.as_ref().map_or("", |n| n.0.as_str()) )); } JoinOperator::TypedJoin(Some(jt)) @@ -977,7 +977,7 @@ impl TryFrom<&str> for JoinType { } else if "OUTER".eq_ignore_ascii_case(s) { Ok(JoinType::OUTER) } else { - Err(custom_err!("unsupported JOIN type: {}", s)) + Err(custom_err!("unknown join type: {}", s)) } } } From fc9748d83d82c213307c7a9a8f0853637f0bad61 Mon Sep 17 00:00:00 2001 From: gwenn Date: Thu, 9 May 2024 13:14:14 +0200 Subject: [PATCH 04/22] Check no tables specified --- src/lexer/sql/test.rs | 8 ++++++++ src/parser/ast/mod.rs | 28 ++++++++++++++++++++++++++++ src/parser/parse.y | 6 ++---- 3 files changed, 38 insertions(+), 4 deletions(-) diff --git a/src/lexer/sql/test.rs b/src/lexer/sql/test.rs index 2fde213..0eacea0 100644 --- a/src/lexer/sql/test.rs +++ b/src/lexer/sql/test.rs @@ -357,6 +357,14 @@ fn unknown_join_type() { ) } +#[test] +fn no_tables_specified() { + expect_parser_err_msg(b"SELECT *", "no tables specified"); + expect_parser_err_msg(b"SELECT t.*", "no tables specified"); + expect_parser_err_msg(b"SELECT count(*), *", "no tables specified"); + parse_cmd(b"SELECT count(*)"); +} + #[test] fn unknown_table_option() { expect_parser_err_msg(b"CREATE TABLE t(x)o", "unknown table option: o"); diff --git a/src/parser/ast/mod.rs b/src/parser/ast/mod.rs index b51d2a7..0d873b0 100644 --- a/src/parser/ast/mod.rs +++ b/src/parser/ast/mod.rs @@ -782,6 +782,34 @@ pub enum OneSelect { Values(Vec>), } +impl OneSelect { + /// Constructor + pub fn new( + distinctness: Option, + columns: Vec, + from: Option, + where_clause: Option, + group_by: Option, + window_clause: Option>, + ) -> Result { + if from.is_none() + && columns + .iter() + .any(|rc| matches!(rc, ResultColumn::Star | ResultColumn::TableStar(_))) + { + return Err(custom_err!("no tables specified")); + } + Ok(OneSelect::Select { + distinctness, + columns, + from, + where_clause, + group_by, + window_clause, + }) + } +} + /// `SELECT` ... `FROM` clause // https://sqlite.org/syntax/join-clause.html #[derive(Clone, Debug, PartialEq, Eq)] diff --git a/src/parser/parse.y b/src/parser/parse.y index ce6c027..cbefb3d 100644 --- a/src/parser/parse.y +++ b/src/parser/parse.y @@ -530,14 +530,12 @@ multiselect_op(A) ::= INTERSECT. {A = CompoundOperator::Intersect;} oneselect(A) ::= SELECT distinct(D) selcollist(W) from(X) where_opt(Y) groupby_opt(P). { - A = OneSelect::Select{ distinctness: D, columns: W, from: X, where_clause: Y, - group_by: P, window_clause: None }; + A = OneSelect::new(D, W, X, Y, P, None)?; } %ifndef SQLITE_OMIT_WINDOWFUNC oneselect(A) ::= SELECT distinct(D) selcollist(W) from(X) where_opt(Y) groupby_opt(P) window_clause(R). { - A = OneSelect::Select{ distinctness: D, columns: W, from: X, where_clause: Y, - group_by: P, window_clause: Some(R) }; + A = OneSelect::new(D, W, X, Y, P, Some(R))?; } %endif From 879b4a869b4659c3489836ccf4c0d38a5c7f14f7 Mon Sep 17 00:00:00 2001 From: gwenn Date: Thu, 9 May 2024 13:54:24 +0200 Subject: [PATCH 05/22] Check UPDATE FROM target --- src/lexer/sql/test.rs | 12 +++++++++++ src/parser/ast/mod.rs | 50 +++++++++++++++++++++++++++++++++++++++++++ src/parser/parse.y | 8 +++---- 3 files changed, 66 insertions(+), 4 deletions(-) diff --git a/src/lexer/sql/test.rs b/src/lexer/sql/test.rs index 0eacea0..b3da45d 100644 --- a/src/lexer/sql/test.rs +++ b/src/lexer/sql/test.rs @@ -365,6 +365,18 @@ fn no_tables_specified() { parse_cmd(b"SELECT count(*)"); } +#[test] +fn update_from_target() { + expect_parser_err_msg( + b"UPDATE x1 SET a=5 FROM x1", + "target object/alias may not appear in FROM clause", + ); + expect_parser_err_msg( + b"UPDATE x1 SET a=5 FROM x2, x1", + "target object/alias may not appear in FROM clause", + ); +} + #[test] fn unknown_table_option() { expect_parser_err_msg(b"CREATE TABLE t(x)o", "unknown table option: o"); diff --git a/src/parser/ast/mod.rs b/src/parser/ast/mod.rs index 0d873b0..a4b7334 100644 --- a/src/parser/ast/mod.rs +++ b/src/parser/ast/mod.rs @@ -266,6 +266,56 @@ pub enum Stmt { Vacuum(Option, Option), } +impl Stmt { + /// constructor + pub fn update( + with: Option, + or_conflict: Option, + tbl_name: QualifiedName, + indexed: Option, + sets: Vec, // FIXME unique + from: Option, + where_clause: Option, + returning: Option>, + order_by: Option>, + limit: Option, + ) -> Result { + if let Some(FromClause { + select: Some(ref select), + ref joins, + .. + }) = from + { + if matches!(select.as_ref(), + SelectTable::Table(qn, _, _) | SelectTable::TableCall(qn, _, _) + if *qn == tbl_name) + || joins.as_ref().map_or(false, |js| js.iter().any(|j| + matches!(j.table, SelectTable::Table(ref qn, _, _) | SelectTable::TableCall(ref qn, _, _) + if *qn == tbl_name))) + { + return Err(custom_err!( + "target object/alias may not appear in FROM clause", + )); + } + } + if order_by.is_some() && limit.is_none() { + return Err(custom_err!("ORDER BY without LIMIT on UPDATE")); + } + Ok(Stmt::Update { + with, + or_conflict, + tbl_name, + indexed, + sets, + from, + where_clause, + returning, + order_by, + limit, + }) + } +} + /// SQL expression // https://sqlite.org/syntax/expr.html #[derive(Clone, Debug, PartialEq, Eq)] diff --git a/src/parser/parse.y b/src/parser/parse.y index cbefb3d..2bdfb4d 100644 --- a/src/parser/parse.y +++ b/src/parser/parse.y @@ -793,15 +793,15 @@ where_opt_ret(A) ::= WHERE expr(X) RETURNING selcollist(Y). cmd ::= with(C) UPDATE orconf(R) xfullname(X) indexed_opt(I) SET setlist(Y) from(F) where_opt_ret(W) orderby_opt(O) limit_opt(L). { let (where_clause, returning) = W; - self.ctx.stmt = Some(Stmt::Update { with: C, or_conflict: R, tbl_name: X, indexed: I, sets: Y, from: F, - where_clause, returning, order_by: O, limit: L }); + self.ctx.stmt = Some(Stmt::update(C, R, X, I, Y, F, + where_clause, returning, O, L)?); } %else cmd ::= with(C) UPDATE orconf(R) xfullname(X) indexed_opt(I) SET setlist(Y) from(F) where_opt_ret(W). { let (where_clause, returning) = W; - self.ctx.stmt = Some(Stmt::Update { with: C, or_conflict: R, tbl_name: X, indexed: I, sets: Y, from: F, - where_clause, returning, order_by: None, limit: None }); + self.ctx.stmt = Some(Stmt::update(C, R, X, I, Y, F, + where_clause, returning, None, None)?); } %endif From 0003c2599f80b472c5c97e2113c422139690c603 Mon Sep 17 00:00:00 2001 From: gwenn Date: Sat, 11 May 2024 10:50:51 +0200 Subject: [PATCH 06/22] Check AUTOINCREMENT --- src/lexer/sql/test.rs | 9 +++++++++ src/parser/ast/mod.rs | 16 +++++++++++++++- 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/src/lexer/sql/test.rs b/src/lexer/sql/test.rs index b3da45d..c4f4dc9 100644 --- a/src/lexer/sql/test.rs +++ b/src/lexer/sql/test.rs @@ -69,6 +69,15 @@ fn create_table_without_column() { ); } +#[test] +fn auto_increment() { + parse_cmd(b"CREATE TABLE t (x INTEGER PRIMARY KEY AUTOINCREMENT)"); + expect_parser_err_msg( + b"CREATE TABLE t (x TEXT PRIMARY KEY AUTOINCREMENT)", + "AUTOINCREMENT is only allowed on an INTEGER PRIMARY KEY", + ); +} + #[test] fn vtab_args() -> Result<(), Error> { let sql = b"CREATE VIRTUAL TABLE mail USING fts3( diff --git a/src/parser/ast/mod.rs b/src/parser/ast/mod.rs index a4b7334..88f5ed7 100644 --- a/src/parser/ast/mod.rs +++ b/src/parser/ast/mod.rs @@ -1373,7 +1373,21 @@ impl ColumnDefinition { } } for constraint in &cd.constraints { - if let ColumnConstraint::ForeignKey { + if let ColumnConstraint::PrimaryKey { + auto_increment: true, + .. + } = &constraint.constraint + { + if cd + .col_type + .as_ref() + .map_or(true, |t| !t.name.eq_ignore_ascii_case("INTEGER")) + { + return Err(custom_err!( + "AUTOINCREMENT is only allowed on an INTEGER PRIMARY KEY" + )); + } + } else if let ColumnConstraint::ForeignKey { clause: ForeignKeyClause { tbl_name, columns, .. From 298f4d045036c59a808fe967dbda319b3c892eeb Mon Sep 17 00:00:00 2001 From: gwenn Date: Sat, 11 May 2024 11:10:04 +0200 Subject: [PATCH 07/22] Check GENERATED column --- src/lexer/sql/test.rs | 8 ++++++++ src/parser/ast/check.rs | 5 ----- src/parser/ast/mod.rs | 25 ++++++++++++++++--------- 3 files changed, 24 insertions(+), 14 deletions(-) diff --git a/src/lexer/sql/test.rs b/src/lexer/sql/test.rs index c4f4dc9..c0fd797 100644 --- a/src/lexer/sql/test.rs +++ b/src/lexer/sql/test.rs @@ -78,6 +78,14 @@ fn auto_increment() { ); } +#[test] +fn generated() { + expect_parser_err_msg( + b"CREATE TABLE x(a PRIMARY KEY AS ('id'));", + "generated columns cannot be part of the PRIMARY KEY", + ) +} + #[test] fn vtab_args() -> Result<(), Error> { let sql = b"CREATE VIRTUAL TABLE mail USING fts3( diff --git a/src/parser/ast/check.rs b/src/parser/ast/check.rs index 954bb27..b3b812a 100644 --- a/src/parser/ast/check.rs +++ b/src/parser/ast/check.rs @@ -173,11 +173,6 @@ impl Stmt { body: InsertBody::DefaultValues, .. } => Err(custom_err!("0 values for {} columns", columns.len())), - Stmt::Update { - order_by: Some(_), - limit: None, - .. - } => Err(custom_err!("ORDER BY without LIMIT on UPDATE")), _ => Ok(()), } } diff --git a/src/parser/ast/mod.rs b/src/parser/ast/mod.rs index 88f5ed7..13b3b80 100644 --- a/src/parser/ast/mod.rs +++ b/src/parser/ast/mod.rs @@ -268,6 +268,7 @@ pub enum Stmt { impl Stmt { /// constructor + #[allow(clippy::too_many_arguments)] pub fn update( with: Option, or_conflict: Option, @@ -1372,21 +1373,20 @@ impl ColumnDefinition { col_type.name = new_type.join(" "); } } + let (mut primary_key, mut generated) = (false, false); for constraint in &cd.constraints { - if let ColumnConstraint::PrimaryKey { - auto_increment: true, - .. - } = &constraint.constraint - { - if cd - .col_type - .as_ref() - .map_or(true, |t| !t.name.eq_ignore_ascii_case("INTEGER")) + if let ColumnConstraint::PrimaryKey { auto_increment, .. } = &constraint.constraint { + if *auto_increment + && cd + .col_type + .as_ref() + .map_or(true, |t| !t.name.eq_ignore_ascii_case("INTEGER")) { return Err(custom_err!( "AUTOINCREMENT is only allowed on an INTEGER PRIMARY KEY" )); } + primary_key = true; } else if let ColumnConstraint::ForeignKey { clause: ForeignKeyClause { @@ -1403,8 +1403,15 @@ impl ColumnDefinition { tbl_name )); } + } else if let ColumnConstraint::Generated { .. } = &constraint.constraint { + generated = true; } } + if primary_key && generated { + return Err(custom_err!( + "generated columns cannot be part of the PRIMARY KEY" + )); + } columns.insert(col_name.clone(), cd); Ok(()) } From df637dcfff37aca75454dd9e82fd4a064bbe5de3 Mon Sep 17 00:00:00 2001 From: gwenn Date: Sat, 11 May 2024 11:18:45 +0200 Subject: [PATCH 08/22] Check GENERATED column --- src/lexer/sql/test.rs | 6 +++++- src/parser/ast/mod.rs | 6 +++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/src/lexer/sql/test.rs b/src/lexer/sql/test.rs index c0fd797..ec00d36 100644 --- a/src/lexer/sql/test.rs +++ b/src/lexer/sql/test.rs @@ -81,8 +81,12 @@ fn auto_increment() { #[test] fn generated() { expect_parser_err_msg( - b"CREATE TABLE x(a PRIMARY KEY AS ('id'));", + b"CREATE TABLE x(a PRIMARY KEY AS ('id'))", "generated columns cannot be part of the PRIMARY KEY", + ); + expect_parser_err_msg( + b"CREATE TABLE x(a AS ('id') DEFAULT '')", + "cannot use DEFAULT on a generated column", ) } diff --git a/src/parser/ast/mod.rs b/src/parser/ast/mod.rs index 13b3b80..b252d5c 100644 --- a/src/parser/ast/mod.rs +++ b/src/parser/ast/mod.rs @@ -1373,7 +1373,7 @@ impl ColumnDefinition { col_type.name = new_type.join(" "); } } - let (mut primary_key, mut generated) = (false, false); + let (mut primary_key, mut generated, mut default) = (false, false, false); for constraint in &cd.constraints { if let ColumnConstraint::PrimaryKey { auto_increment, .. } = &constraint.constraint { if *auto_increment @@ -1405,12 +1405,16 @@ impl ColumnDefinition { } } else if let ColumnConstraint::Generated { .. } = &constraint.constraint { generated = true; + } else if let ColumnConstraint::Default(..) = &constraint.constraint { + default = true; } } if primary_key && generated { return Err(custom_err!( "generated columns cannot be part of the PRIMARY KEY" )); + } else if default && generated { + return Err(custom_err!("cannot use DEFAULT on a generated column")); } columns.insert(col_name.clone(), cd); Ok(()) From 50a74ac206bbcc521ad36c6b34442409165369f3 Mon Sep 17 00:00:00 2001 From: gwenn Date: Sat, 11 May 2024 13:17:18 +0200 Subject: [PATCH 09/22] cannot use RETURNING in a trigger --- src/lexer/sql/test.rs | 5 +++++ src/parser/parse.y | 13 ++++++++----- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/src/lexer/sql/test.rs b/src/lexer/sql/test.rs index ec00d36..15cc8b6 100644 --- a/src/lexer/sql/test.rs +++ b/src/lexer/sql/test.rs @@ -433,6 +433,11 @@ fn indexed_by_clause_within_triggers() { ); } +#[test] +fn returning_within_trigger() { + expect_parser_err_msg(b"CREATE TRIGGER t AFTER DELETE ON x BEGIN INSERT INTO x (a) VALUES ('x') RETURNING rowid; END;", "cannot use RETURNING in a trigger"); +} + fn expect_parser_err_msg(input: &[u8], error_msg: &str) { expect_parser_err(input, ParserError::Custom(error_msg.to_owned())) } diff --git a/src/parser/parse.y b/src/parser/parse.y index 2bdfb4d..4c3aff1 100644 --- a/src/parser/parse.y +++ b/src/parser/parse.y @@ -605,8 +605,8 @@ from(A) ::= FROM seltablist(X). { // in a SELECT statement. "stl_prefix" is a prefix of this list. // stl_prefix(A) ::= seltablist(A) joinop(Y). { - let op = Y; - A.push_op(op); + let op = Y; + A.push_op(op); } stl_prefix(A) ::= . {A = FromClause::empty();} seltablist(A) ::= stl_prefix(A) fullname(Y) as(Z) indexed_opt(I) @@ -650,10 +650,10 @@ xfullname(A) ::= nm(X). xfullname(A) ::= nm(X) DOT nm(Y). {A = QualifiedName::fullname(X, Y); /*A-overwrites-X*/} xfullname(A) ::= nm(X) DOT nm(Y) AS nm(Z). { - A = QualifiedName::xfullname(X, Y, Z); /*A-overwrites-X*/ + A = QualifiedName::xfullname(X, Y, Z); /*A-overwrites-X*/ } xfullname(A) ::= nm(X) AS nm(Z). { - A = QualifiedName::alias(X, Z); /*A-overwrites-X*/ + A = QualifiedName::alias(X, Z); /*A-overwrites-X*/ } %type joinop {JoinOperator} @@ -1244,7 +1244,10 @@ trigger_cmd(A) ::= trigger_cmd(A) ::= insert_cmd(R) INTO trnm(X) idlist_opt(F) select(S) upsert(U). { let (upsert, returning) = U; - A = TriggerCmd::Insert{ or_conflict: R, tbl_name: X, col_names: F, select: S, upsert, returning };/*A-overwrites-R*/ + if returning.is_some() { + return Err(custom_err!("cannot use RETURNING in a trigger")); + } + A = TriggerCmd::Insert{ or_conflict: R, tbl_name: X, col_names: F, select: S, upsert, returning };/*A-overwrites-R*/ } // DELETE trigger_cmd(A) ::= DELETE FROM trnm(X) tridxby where_opt(Y). From e9906d5c54f5ec5af016ffd974d7cb451f638f4e Mon Sep 17 00:00:00 2001 From: gwenn Date: Sat, 11 May 2024 18:38:37 +0200 Subject: [PATCH 10/22] Check object name reserved for internal use --- src/lexer/sql/test.rs | 20 ++++++++++++++++++++ src/parser/ast/check.rs | 14 ++++++++++++++ src/parser/ast/mod.rs | 8 ++++++++ 3 files changed, 42 insertions(+) diff --git a/src/lexer/sql/test.rs b/src/lexer/sql/test.rs index 15cc8b6..6cc7435 100644 --- a/src/lexer/sql/test.rs +++ b/src/lexer/sql/test.rs @@ -438,6 +438,26 @@ fn returning_within_trigger() { expect_parser_err_msg(b"CREATE TRIGGER t AFTER DELETE ON x BEGIN INSERT INTO x (a) VALUES ('x') RETURNING rowid; END;", "cannot use RETURNING in a trigger"); } +#[test] +fn reserved_name() { + expect_parser_err_msg( + b"CREATE TABLE sqlite_x(a)", + "object name reserved for internal use: sqlite_x", + ); + expect_parser_err_msg( + b"CREATE VIEW sqlite_x(a) AS SELECT 1", + "object name reserved for internal use: sqlite_x", + ); + expect_parser_err_msg( + b"CREATE INDEX sqlite_x ON x(a)", + "object name reserved for internal use: sqlite_x", + ); + expect_parser_err_msg( + b"CREATE TRIGGER sqlite_x AFTER INSERT ON x BEGIN SELECT 1; END;", + "object name reserved for internal use: sqlite_x", + ); +} + fn expect_parser_err_msg(input: &[u8], error_msg: &str) { expect_parser_err(input, ParserError::Custom(error_msg.to_owned())) } diff --git a/src/parser/ast/check.rs b/src/parser/ast/check.rs index b3b812a..e16ac9d 100644 --- a/src/parser/ast/check.rs +++ b/src/parser/ast/check.rs @@ -113,12 +113,14 @@ impl Stmt { } Ok(()) } + Stmt::CreateIndex { idx_name, .. } => check_reserved_name(idx_name), Stmt::CreateTable { temporary, tbl_name, body, .. } => { + check_reserved_name(tbl_name)?; if *temporary { if let Some(ref db_name) = tbl_name.db_name { if db_name != "TEMP" { @@ -128,12 +130,14 @@ impl Stmt { } body.check(tbl_name) } + Stmt::CreateTrigger { trigger_name, .. } => check_reserved_name(trigger_name), Stmt::CreateView { view_name, columns: Some(columns), select, .. } => { + check_reserved_name(view_name)?; // SQLite3 engine renames duplicates: for (i, c) in columns.iter().enumerate() { for o in &columns[i + 1..] { @@ -178,6 +182,16 @@ impl Stmt { } } +fn check_reserved_name(name: &QualifiedName) -> Result<(), ParserError> { + if name.name.is_reserved() { + return Err(custom_err!( + "object name reserved for internal use: {}", + name.name + )); + } + Ok(()) +} + impl CreateTableBody { /// check for extra rules pub fn check(&self, tbl_name: &QualifiedName) -> Result<(), ParserError> { diff --git a/src/parser/ast/mod.rs b/src/parser/ast/mod.rs index b252d5c..939a552 100644 --- a/src/parser/ast/mod.rs +++ b/src/parser/ast/mod.rs @@ -1121,6 +1121,14 @@ impl Name { } QuotedIterator(sub.bytes(), quote) } + + fn is_reserved(&self) -> bool { + let bytes = self.as_bytes(); + let reserved = QuotedIterator("sqlite_".bytes(), 0); + bytes + .zip(reserved) + .all(|(b1, b2)| b1.eq_ignore_ascii_case(&b2)) + } } struct QuotedIterator<'s>(Bytes<'s>, u8); From e05850dfe22457acf0e13977998431c06500f194 Mon Sep 17 00:00:00 2001 From: gwenn Date: Mon, 20 May 2024 09:10:38 +0200 Subject: [PATCH 11/22] Introduce ColFlags --- src/parser/ast/check.rs | 22 ++--- src/parser/ast/mod.rs | 188 +++++++++++++++++++++++++--------------- src/parser/parse.y | 6 +- 3 files changed, 131 insertions(+), 85 deletions(-) diff --git a/src/parser/ast/check.rs b/src/parser/ast/check.rs index e16ac9d..f6df992 100644 --- a/src/parser/ast/check.rs +++ b/src/parser/ast/check.rs @@ -104,12 +104,10 @@ impl Stmt { Ok(()) } Stmt::AlterTable(.., AlterTableBody::AddColumn(cd)) => { - for c in cd { - if let ColumnConstraint::PrimaryKey { .. } = c { - return Err(custom_err!("Cannot add a PRIMARY KEY column")); - } else if let ColumnConstraint::Unique(..) = c { - return Err(custom_err!("Cannot add a UNIQUE column")); - } + if cd.col_flags.contains(ColFlags::PRIMKEY) { + return Err(custom_err!("Cannot add a PRIMARY KEY column")); + } else if cd.col_flags.contains(ColFlags::UNIQUE) { + return Err(custom_err!("Cannot add a UNIQUE column")); } Ok(()) } @@ -203,10 +201,8 @@ impl CreateTableBody { { let mut generated_count = 0; for c in columns.values() { - for cs in c.constraints.iter() { - if let ColumnConstraint::Generated { .. } = cs.constraint { - generated_count += 1; - } + if c.col_flags.intersects(ColFlags::GENERATED) { + generated_count += 1; } } if generated_count == columns.len() { @@ -260,10 +256,8 @@ impl CreateTableBody { } = self { for col in columns.values() { - for c in col { - if let ColumnConstraint::PrimaryKey { .. } = c { - return true; - } + if col.col_flags.contains(ColFlags::PRIMKEY) { + return true; } } if let Some(constraints) = constraints { diff --git a/src/parser/ast/mod.rs b/src/parser/ast/mod.rs index 939a552..0875ee0 100644 --- a/src/parser/ast/mod.rs +++ b/src/parser/ast/mod.rs @@ -1331,6 +1331,35 @@ impl CreateTableBody { } } +bitflags::bitflags! { + /// Column definition flags + #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] + pub struct ColFlags: u16 { + /// Column is part of the primary key + const PRIMKEY = 0x0001; + // A hidden column in a virtual table + //const HIDDEN = 0x0002; + /// Type name follows column name + const HASTYPE = 0x0004; + /// Column def contains "UNIQUE" or "PK" + const UNIQUE = 0x0008; + //const SORTERREF = 0x0010; /* Use sorter-refs with this column */ + /// GENERATED ALWAYS AS ... VIRTUAL + const VIRTUAL = 0x0020; + /// GENERATED ALWAYS AS ... STORED + const STORED = 0x0040; + //const NOTAVAIL = 0x0080; /* STORED column not yet calculated */ + //const BUSY = 0x0100; /* Blocks recursion on GENERATED columns */ + /// Has collating sequence name in zCnName + const HASCOLL = 0x0200; + //const NOEXPAND = 0x0400; /* Omit this column when expanding "*" */ + /// Combo: STORED, VIRTUAL + const GENERATED = Self::STORED.bits() | Self::VIRTUAL.bits(); + // Combo: HIDDEN, STORED, VIRTUAL + //const NOINSERT = Self::HIDDEN.bits() | Self::STORED.bits() | Self::VIRTUAL.bits(); + } +} + /// Table column definition // https://sqlite.org/syntax/column-def.html #[derive(Clone, Debug, PartialEq, Eq)] @@ -1341,89 +1370,112 @@ pub struct ColumnDefinition { pub col_type: Option, /// column constraints pub constraints: Vec, + /// column flags + pub col_flags: ColFlags, } impl ColumnDefinition { /// Constructor - pub fn add_column( - columns: &mut IndexMap, - mut cd: ColumnDefinition, - ) -> Result<(), ParserError> { - let col_name = &cd.col_name; - if columns.contains_key(col_name) { - // TODO unquote - return Err(custom_err!("duplicate column name: {}", col_name)); - } - // https://github.com/sqlite/sqlite/blob/e452bf40a14aca57fd9047b330dff282f3e4bbcc/src/build.c#L1511-L1514 - if let Some(ref mut col_type) = cd.col_type { - let mut split = col_type.name.split_ascii_whitespace(); - let truncate = if split - .next_back() - .map_or(false, |s| s.eq_ignore_ascii_case("ALWAYS")) - && split - .next_back() - .map_or(false, |s| s.eq_ignore_ascii_case("GENERATED")) - { - let mut generated = false; - for constraint in &cd.constraints { - if let ColumnConstraint::Generated { .. } = constraint.constraint { - generated = true; - break; + pub fn new( + col_name: Name, + mut col_type: Option, + constraints: Vec, + ) -> Result { + let mut col_flags = ColFlags::empty(); + let mut default = false; + for constraint in &constraints { + match &constraint.constraint { + ColumnConstraint::Default(..) => { + default = true; + } + ColumnConstraint::Collate { .. } => { + col_flags |= ColFlags::HASCOLL; + } + ColumnConstraint::Generated { typ, .. } => { + col_flags |= ColFlags::VIRTUAL; + if let Some(id) = typ { + if id.0.eq_ignore_ascii_case("STORED") { + col_flags |= ColFlags::STORED; + } } } - generated - } else { - false - }; - if truncate { - // str_split_whitespace_remainder - let new_type: Vec<&str> = split.collect(); - col_type.name = new_type.join(" "); - } - } - let (mut primary_key, mut generated, mut default) = (false, false, false); - for constraint in &cd.constraints { - if let ColumnConstraint::PrimaryKey { auto_increment, .. } = &constraint.constraint { - if *auto_increment - && cd - .col_type - .as_ref() - .map_or(true, |t| !t.name.eq_ignore_ascii_case("INTEGER")) - { - return Err(custom_err!( - "AUTOINCREMENT is only allowed on an INTEGER PRIMARY KEY" - )); + ColumnConstraint::ForeignKey { + clause: + ForeignKeyClause { + tbl_name, columns, .. + }, + .. + } => { + // The child table may reference the primary key of the parent without specifying the primary key column + if columns.as_ref().map_or(0, |v| v.len()) > 1 { + return Err(custom_err!( + "foreign key on {} should reference only one column of table {}", + col_name, + tbl_name + )); + } } - primary_key = true; - } else if let ColumnConstraint::ForeignKey { - clause: - ForeignKeyClause { - tbl_name, columns, .. - }, - .. - } = &constraint.constraint - { - // The child table may reference the primary key of the parent without specifying the primary key column - if columns.as_ref().map_or(0, |v| v.len()) > 1 { - return Err(custom_err!( - "foreign key on {} should reference only one column of table {}", - col_name, - tbl_name - )); + ColumnConstraint::PrimaryKey { auto_increment, .. } => { + if *auto_increment + && col_type + .as_ref() + .map_or(true, |t| !t.name.eq_ignore_ascii_case("INTEGER")) + { + return Err(custom_err!( + "AUTOINCREMENT is only allowed on an INTEGER PRIMARY KEY" + )); + } + col_flags |= ColFlags::PRIMKEY | ColFlags::UNIQUE; + } + ColumnConstraint::Unique(..) => { + col_flags |= ColFlags::UNIQUE; } - } else if let ColumnConstraint::Generated { .. } = &constraint.constraint { - generated = true; - } else if let ColumnConstraint::Default(..) = &constraint.constraint { - default = true; + _ => {} } } - if primary_key && generated { + if col_flags.contains(ColFlags::PRIMKEY) && col_flags.intersects(ColFlags::GENERATED) { return Err(custom_err!( "generated columns cannot be part of the PRIMARY KEY" )); - } else if default && generated { + } else if default && col_flags.intersects(ColFlags::GENERATED) { return Err(custom_err!("cannot use DEFAULT on a generated column")); } + if col_flags.intersects(ColFlags::GENERATED) { + // https://github.com/sqlite/sqlite/blob/e452bf40a14aca57fd9047b330dff282f3e4bbcc/src/build.c#L1511-L1514 + if let Some(ref mut col_type) = col_type { + let mut split = col_type.name.split_ascii_whitespace(); + if split + .next_back() + .map_or(false, |s| s.eq_ignore_ascii_case("ALWAYS")) + && split + .next_back() + .map_or(false, |s| s.eq_ignore_ascii_case("GENERATED")) + { + // str_split_whitespace_remainder + let new_type: Vec<&str> = split.collect(); + col_type.name = new_type.join(" "); + } + } + } + if col_type.as_ref().map_or(false, |t| !t.name.is_empty()) { + col_flags |= ColFlags::HASTYPE; + } + Ok(ColumnDefinition { + col_name, + col_type, + constraints, + col_flags, + }) + } + /// Constructor + pub fn add_column( + columns: &mut IndexMap, + cd: ColumnDefinition, + ) -> Result<(), ParserError> { + let col_name = &cd.col_name; + if columns.contains_key(col_name) { + return Err(custom_err!("duplicate column name: {}", col_name)); + } columns.insert(col_name.clone(), cd); Ok(()) } diff --git a/src/parser/parse.y b/src/parser/parse.y index 4c3aff1..f368122 100644 --- a/src/parser/parse.y +++ b/src/parser/parse.y @@ -160,12 +160,12 @@ table_option(A) ::= nm(X). { %type columnlist {IndexMap} columnlist(A) ::= columnlist(A) COMMA columnname(X) carglist(Y). { let col = X; - let cd = ColumnDefinition{ col_name: col.0, col_type: col.1, constraints: Y }; + let cd = ColumnDefinition::new(col.0, col.1, Y)?; ColumnDefinition::add_column(A, cd)?; } columnlist(A) ::= columnname(X) carglist(Y). { let col = X; - let cd = ColumnDefinition{ col_name: col.0, col_type: col.1, constraints: Y }; + let cd = ColumnDefinition::new(col.0, col.1, Y)?; let mut map = IndexMap::new(); ColumnDefinition::add_column(&mut map, cd)?; A = map; @@ -1316,7 +1316,7 @@ cmd ::= ALTER TABLE fullname(X) RENAME TO nm(Z). { cmd ::= ALTER TABLE fullname(X) ADD kwcolumn_opt columnname(Y) carglist(C). { let (col_name, col_type) = Y; - let cd = ColumnDefinition{ col_name, col_type, constraints: C }; + let cd = ColumnDefinition::new(col_name, col_type, C)?; self.ctx.stmt = Some(Stmt::AlterTable(X, AlterTableBody::AddColumn(cd))); } cmd ::= ALTER TABLE fullname(X) RENAME kwcolumn_opt nm(Y) TO nm(Z). { From c9b26caf3d3f242754ce6500af852f2b36dac12c Mon Sep 17 00:00:00 2001 From: gwenn Date: Mon, 20 May 2024 10:49:44 +0200 Subject: [PATCH 12/22] Introduce TabFlags --- src/parser/ast/check.rs | 16 +++---- src/parser/ast/fmt.rs | 6 +-- src/parser/ast/mod.rs | 92 ++++++++++++++++++++++++++++------------- src/parser/parse.y | 10 ++--- 4 files changed, 79 insertions(+), 45 deletions(-) diff --git a/src/parser/ast/check.rs b/src/parser/ast/check.rs index f6df992..d7e5a26 100644 --- a/src/parser/ast/check.rs +++ b/src/parser/ast/check.rs @@ -104,9 +104,9 @@ impl Stmt { Ok(()) } Stmt::AlterTable(.., AlterTableBody::AddColumn(cd)) => { - if cd.col_flags.contains(ColFlags::PRIMKEY) { + if cd.flags.contains(ColFlags::PRIMKEY) { return Err(custom_err!("Cannot add a PRIMARY KEY column")); - } else if cd.col_flags.contains(ColFlags::UNIQUE) { + } else if cd.flags.contains(ColFlags::UNIQUE) { return Err(custom_err!("Cannot add a UNIQUE column")); } Ok(()) @@ -195,13 +195,13 @@ impl CreateTableBody { pub fn check(&self, tbl_name: &QualifiedName) -> Result<(), ParserError> { if let CreateTableBody::ColumnsAndConstraints { columns, - constraints: _, - options, + constraints, + flags, } = self { let mut generated_count = 0; for c in columns.values() { - if c.col_flags.intersects(ColFlags::GENERATED) { + if c.flags.intersects(ColFlags::GENERATED) { generated_count += 1; } } @@ -209,7 +209,7 @@ impl CreateTableBody { return Err(custom_err!("must have at least one non-generated column")); } - if options.contains(TableOptions::STRICT) { + if flags.contains(TabFlags::Strict) { for c in columns.values() { match &c.col_type { Some(Type { name, .. }) => { @@ -240,7 +240,7 @@ impl CreateTableBody { } } } - if options.contains(TableOptions::WITHOUT_ROWID) && !self.has_primary_key() { + if flags.contains(TabFlags::WithoutRowid) && !self.has_primary_key() { return Err(custom_err!("PRIMARY KEY missing on table {}", tbl_name,)); } } @@ -256,7 +256,7 @@ impl CreateTableBody { } = self { for col in columns.values() { - if col.col_flags.contains(ColFlags::PRIMKEY) { + if col.flags.contains(ColFlags::PRIMKEY) { return true; } } diff --git a/src/parser/ast/fmt.rs b/src/parser/ast/fmt.rs index 350e739..cdcb97f 100644 --- a/src/parser/ast/fmt.rs +++ b/src/parser/ast/fmt.rs @@ -1188,7 +1188,7 @@ impl ToTokens for CreateTableBody { CreateTableBody::ColumnsAndConstraints { columns, constraints, - options, + flags, } => { s.append(TK_LP, None)?; comma(columns.values(), s)?; @@ -1197,11 +1197,11 @@ impl ToTokens for CreateTableBody { comma(constraints, s)?; } s.append(TK_RP, None)?; - if options.contains(TableOptions::WITHOUT_ROWID) { + if flags.contains(TabFlags::WithoutRowid) { s.append(TK_WITHOUT, None)?; s.append(TK_ID, Some("ROWID"))?; } - if options.contains(TableOptions::STRICT) { + if flags.contains(TabFlags::Strict) { s.append(TK_ID, Some("STRICT"))?; } Ok(()) diff --git a/src/parser/ast/mod.rs b/src/parser/ast/mod.rs index 0875ee0..d0f5bde 100644 --- a/src/parser/ast/mod.rs +++ b/src/parser/ast/mod.rs @@ -1298,6 +1298,42 @@ pub enum AlterTableBody { DropColumn(Name), // TODO distinction between DROP and DROP COLUMN } +bitflags::bitflags! { + /// `CREATE TABLE` flags + #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] + pub struct TabFlags: u32 { + //const TF_Readonly = 0x00000001; // Read-only system table + /// Has one or more hidden columns + const HasHidden = 0x00000002; + /// Table has a primary key + const HasPrimaryKey = 0x00000004; + /// Integer primary key is autoincrement + const Autoincrement = 0x00000008; + //const TF_HasStat1 = 0x00000010; // nRowLogEst set from sqlite_stat1 + /// Has one or more VIRTUAL columns + const HasVirtual = 0x00000020; + /// Has one or more STORED columns + const HasStored = 0x00000040; + /// Combo: HasVirtual + HasStored + const HasGenerated = 0x00000060; + /// No rowid. PRIMARY KEY is the key + const WithoutRowid = 0x00000080; + //const TF_MaybeReanalyze = 0x00000100; // Maybe run ANALYZE on this table + // No user-visible "rowid" column + //const NoVisibleRowid = 0x00000200; + // Out-of-Order hidden columns + //const OOOHidden = 0x00000400; + /// Contains NOT NULL constraints + const HasNotNull = 0x00000800; + //const Shadow = 0x00001000; // True for a shadow table + //const TF_HasStat4 = 0x00002000; // STAT4 info available for this table + //const Ephemeral = 0x00004000; // An ephemeral table + //const TF_Eponymous = 0x00008000; // An eponymous virtual table + /// STRICT mode + const Strict = 0x00010000; + } +} + /// `CREATE TABLE` body // https://sqlite.org/lang_createtable.html // https://sqlite.org/syntax/create-table-stmt.html @@ -1309,8 +1345,8 @@ pub enum CreateTableBody { columns: IndexMap, /// table constraints constraints: Option>, - /// table options - options: TableOptions, + /// table flags + flags: TabFlags, }, /// `AS` select AsSelect(Select), @@ -1321,12 +1357,17 @@ impl CreateTableBody { pub fn columns_and_constraints( columns: IndexMap, constraints: Option>, - options: TableOptions, + mut flags: TabFlags, ) -> Result { + for col in columns.values() { + if col.flags.contains(ColFlags::PRIMKEY) { + flags |= TabFlags::HasPrimaryKey; + } + } Ok(CreateTableBody::ColumnsAndConstraints { columns, constraints, - options, + flags, }) } } @@ -1371,7 +1412,7 @@ pub struct ColumnDefinition { /// column constraints pub constraints: Vec, /// column flags - pub col_flags: ColFlags, + pub flags: ColFlags, } impl ColumnDefinition { @@ -1381,7 +1422,7 @@ impl ColumnDefinition { mut col_type: Option, constraints: Vec, ) -> Result { - let mut col_flags = ColFlags::empty(); + let mut flags = ColFlags::empty(); let mut default = false; for constraint in &constraints { match &constraint.constraint { @@ -1389,13 +1430,13 @@ impl ColumnDefinition { default = true; } ColumnConstraint::Collate { .. } => { - col_flags |= ColFlags::HASCOLL; + flags |= ColFlags::HASCOLL; } ColumnConstraint::Generated { typ, .. } => { - col_flags |= ColFlags::VIRTUAL; + flags |= ColFlags::VIRTUAL; if let Some(id) = typ { if id.0.eq_ignore_ascii_case("STORED") { - col_flags |= ColFlags::STORED; + flags |= ColFlags::STORED; } } } @@ -1425,22 +1466,22 @@ impl ColumnDefinition { "AUTOINCREMENT is only allowed on an INTEGER PRIMARY KEY" )); } - col_flags |= ColFlags::PRIMKEY | ColFlags::UNIQUE; + flags |= ColFlags::PRIMKEY | ColFlags::UNIQUE; } ColumnConstraint::Unique(..) => { - col_flags |= ColFlags::UNIQUE; + flags |= ColFlags::UNIQUE; } _ => {} } } - if col_flags.contains(ColFlags::PRIMKEY) && col_flags.intersects(ColFlags::GENERATED) { + if flags.contains(ColFlags::PRIMKEY) && flags.intersects(ColFlags::GENERATED) { return Err(custom_err!( "generated columns cannot be part of the PRIMARY KEY" )); - } else if default && col_flags.intersects(ColFlags::GENERATED) { + } else if default && flags.intersects(ColFlags::GENERATED) { return Err(custom_err!("cannot use DEFAULT on a generated column")); } - if col_flags.intersects(ColFlags::GENERATED) { + if flags.intersects(ColFlags::GENERATED) { // https://github.com/sqlite/sqlite/blob/e452bf40a14aca57fd9047b330dff282f3e4bbcc/src/build.c#L1511-L1514 if let Some(ref mut col_type) = col_type { let mut split = col_type.name.split_ascii_whitespace(); @@ -1458,13 +1499,13 @@ impl ColumnDefinition { } } if col_type.as_ref().map_or(false, |t| !t.name.is_empty()) { - col_flags |= ColFlags::HASTYPE; + flags |= ColFlags::HASTYPE; } Ok(ColumnDefinition { col_name, col_type, constraints, - col_flags, + flags, }) } /// Constructor @@ -1475,6 +1516,12 @@ impl ColumnDefinition { let col_name = &cd.col_name; if columns.contains_key(col_name) { return Err(custom_err!("duplicate column name: {}", col_name)); + } else if cd.flags.contains(ColFlags::PRIMKEY) + && columns + .values() + .any(|c| c.flags.contains(ColFlags::PRIMKEY)) + { + return Err(custom_err!("table has more than one primary key")); // FIXME table name } columns.insert(col_name.clone(), cd); Ok(()) @@ -1583,19 +1630,6 @@ pub enum TableConstraint { }, } -bitflags::bitflags! { - /// `CREATE TABLE` options - #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] - pub struct TableOptions: u8 { - /// None - const NONE = 0; - /// `WITHOUT ROWID` - const WITHOUT_ROWID = 1; - /// `STRICT` - const STRICT = 2; - } -} - /// Sort orders #[derive(Copy, Clone, Debug, PartialEq, Eq)] pub enum SortOrder { diff --git a/src/parser/parse.y b/src/parser/parse.y index f368122..bf29c6c 100644 --- a/src/parser/parse.y +++ b/src/parser/parse.y @@ -136,15 +136,15 @@ create_table_args(A) ::= LP columnlist(C) conslist_opt(X) RP table_option_set(F) create_table_args(A) ::= AS select(S). { A = CreateTableBody::AsSelect(S); } -%type table_option_set {TableOptions} -%type table_option {TableOptions} -table_option_set(A) ::= . {A = TableOptions::NONE;} +%type table_option_set {TabFlags} +%type table_option {TabFlags} +table_option_set(A) ::= . {A = TabFlags::empty();} table_option_set(A) ::= table_option(A). table_option_set(A) ::= table_option_set(X) COMMA table_option(Y). {A = X|Y;} table_option(A) ::= WITHOUT nm(X). { let option = X; if option == "rowid" { - A = TableOptions::WITHOUT_ROWID; + A = TabFlags::WithoutRowid; }else{ return Err(custom_err!("unknown table option: {}", option)); } @@ -152,7 +152,7 @@ table_option(A) ::= WITHOUT nm(X). { table_option(A) ::= nm(X). { let option = X; if option == "strict" { - A = TableOptions::STRICT; + A = TabFlags::Strict; }else{ return Err(custom_err!("unknown table option: {}", option)); } From b1afe0619ece6780b3d578900b100889c1cf4ef3 Mon Sep 17 00:00:00 2001 From: gwenn Date: Mon, 20 May 2024 16:03:54 +0200 Subject: [PATCH 13/22] Fix is_reserved implementation --- examples/sql_tokens.rs | 2 +- src/lexer/sql/test.rs | 2 ++ src/parser/ast/mod.rs | 8 ++++---- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/examples/sql_tokens.rs b/examples/sql_tokens.rs index 4d49389..1fc76fb 100644 --- a/examples/sql_tokens.rs +++ b/examples/sql_tokens.rs @@ -50,7 +50,7 @@ fn main() { if res.is_err() { eprintln!("Err: {} in {}", res.unwrap_err(), arg); }*/ - debug_assert!(token.iter().all(|b| b.is_ascii_digit())) + debug_assert!(token.iter().all(|b| b.is_ascii_digit() || *b == b'_')) } } TK_FLOAT => { diff --git a/src/lexer/sql/test.rs b/src/lexer/sql/test.rs index 6cc7435..48394b4 100644 --- a/src/lexer/sql/test.rs +++ b/src/lexer/sql/test.rs @@ -456,6 +456,8 @@ fn reserved_name() { b"CREATE TRIGGER sqlite_x AFTER INSERT ON x BEGIN SELECT 1; END;", "object name reserved for internal use: sqlite_x", ); + parse_cmd(b"CREATE TABLE sqlite(a)"); + parse_cmd(b"CREATE INDEX \"\" ON t(a)"); } fn expect_parser_err_msg(input: &[u8], error_msg: &str) { diff --git a/src/parser/ast/mod.rs b/src/parser/ast/mod.rs index d0f5bde..ce5e689 100644 --- a/src/parser/ast/mod.rs +++ b/src/parser/ast/mod.rs @@ -1116,7 +1116,7 @@ impl Name { debug_assert!(bytes.len() > 1); debug_assert_eq!(quote, bytes[bytes.len() - 1]); let sub = &self.0.as_str()[1..bytes.len() - 1]; - if quote == b']' { + if quote == b']' || sub.len() < 2 { return QuotedIterator(sub.bytes(), 0); // no escape } QuotedIterator(sub.bytes(), quote) @@ -1125,9 +1125,9 @@ impl Name { fn is_reserved(&self) -> bool { let bytes = self.as_bytes(); let reserved = QuotedIterator("sqlite_".bytes(), 0); - bytes - .zip(reserved) - .all(|(b1, b2)| b1.eq_ignore_ascii_case(&b2)) + bytes.zip(reserved).fold(0u8, |acc, (b1, b2)| { + acc + if b1.eq_ignore_ascii_case(&b2) { 1 } else { 0 } + }) == 7u8 } } From 25dc5f27af9d0ce6d1937ae1b1e8746644ee2f5b Mon Sep 17 00:00:00 2001 From: gwenn Date: Mon, 20 May 2024 18:16:58 +0200 Subject: [PATCH 14/22] Ignore quotes while checking column type --- src/lexer/sql/test.rs | 6 ++++++ src/parser/ast/check.rs | 13 ++++++++++++ src/parser/ast/mod.rs | 44 +++++++++++++++++++++++------------------ 3 files changed, 44 insertions(+), 19 deletions(-) diff --git a/src/lexer/sql/test.rs b/src/lexer/sql/test.rs index 48394b4..25f8563 100644 --- a/src/lexer/sql/test.rs +++ b/src/lexer/sql/test.rs @@ -72,6 +72,7 @@ fn create_table_without_column() { #[test] fn auto_increment() { parse_cmd(b"CREATE TABLE t (x INTEGER PRIMARY KEY AUTOINCREMENT)"); + parse_cmd(b"CREATE TABLE t (x \"INTEGER\" PRIMARY KEY AUTOINCREMENT)"); expect_parser_err_msg( b"CREATE TABLE t (x TEXT PRIMARY KEY AUTOINCREMENT)", "AUTOINCREMENT is only allowed on an INTEGER PRIMARY KEY", @@ -235,6 +236,11 @@ fn create_strict_table_unknown_datatype() { b"CREATE TABLE t (c1 BOOL) STRICT", "unknown datatype for t.c1: \"BOOL\"", ); + expect_parser_err_msg( + b"CREATE TABLE t (c1 INT(10)) STRICT", + "unknown datatype for t.c1: \"INT(...)\"", + ); + parse_cmd(b"CREATE TABLE t(c1 \"INT\", c2 [TEXT], c3 `INTEGER`)"); } #[test] diff --git a/src/parser/ast/check.rs b/src/parser/ast/check.rs index d7e5a26..8c92a4d 100644 --- a/src/parser/ast/check.rs +++ b/src/parser/ast/check.rs @@ -212,7 +212,20 @@ impl CreateTableBody { if flags.contains(TabFlags::Strict) { for c in columns.values() { match &c.col_type { + Some(Type { + name, + size: Some(_), + }) => { + return Err(custom_err!( + "unknown datatype for {}.{}: \"{}(...)\"", + tbl_name, + c.col_name, + unquote(name).0, + )); + } + // FIXME unquote Some(Type { name, .. }) => { + let name = unquote(name).0; // The datatype must be one of following: INT INTEGER REAL TEXT BLOB ANY if !(name.eq_ignore_ascii_case("INT") || name.eq_ignore_ascii_case("INTEGER") diff --git a/src/parser/ast/mod.rs b/src/parser/ast/mod.rs index ce5e689..64b69ab 100644 --- a/src/parser/ast/mod.rs +++ b/src/parser/ast/mod.rs @@ -1096,6 +1096,27 @@ impl Id { #[derive(Clone, Debug, Eq)] pub struct Name(pub String); // TODO distinction between Name and "Name"/[Name]/`Name` +pub(crate) fn unquote(s: &str) -> (&str, u8) { + if s.is_empty() { + return (s, 0); + } + let bytes = s.as_bytes(); + let mut quote = bytes[0]; + if quote != b'"' && quote != b'`' && quote != b'\'' && quote != b'[' { + return (s, 0); + } else if quote == b'[' { + quote = b']'; + } + debug_assert!(bytes.len() > 1); + debug_assert_eq!(quote, bytes[bytes.len() - 1]); + let sub = &s[1..bytes.len() - 1]; + if quote == b']' || sub.len() < 2 { + (sub, 0) + } else { + (sub, quote) + } +} + impl Name { /// Constructor pub fn from_token(ty: YYCODETYPE, token: Token) -> Name { @@ -1103,22 +1124,7 @@ impl Name { } fn as_bytes(&self) -> QuotedIterator<'_> { - if self.0.is_empty() { - return QuotedIterator(self.0.bytes(), 0); - } - let bytes = self.0.as_bytes(); - let mut quote = bytes[0]; - if quote != b'"' && quote != b'`' && quote != b'\'' && quote != b'[' { - return QuotedIterator(self.0.bytes(), 0); - } else if quote == b'[' { - quote = b']'; - } - debug_assert!(bytes.len() > 1); - debug_assert_eq!(quote, bytes[bytes.len() - 1]); - let sub = &self.0.as_str()[1..bytes.len() - 1]; - if quote == b']' || sub.len() < 2 { - return QuotedIterator(sub.bytes(), 0); // no escape - } + let (sub, quote) = unquote(self.0.as_str()); QuotedIterator(sub.bytes(), quote) } @@ -1458,9 +1464,9 @@ impl ColumnDefinition { } ColumnConstraint::PrimaryKey { auto_increment, .. } => { if *auto_increment - && col_type - .as_ref() - .map_or(true, |t| !t.name.eq_ignore_ascii_case("INTEGER")) + && col_type.as_ref().map_or(true, |t| { + !unquote(t.name.as_str()).0.eq_ignore_ascii_case("INTEGER") + }) { return Err(custom_err!( "AUTOINCREMENT is only allowed on an INTEGER PRIMARY KEY" From f561577a2cfb506c13ed4618be15410ad6458939 Mon Sep 17 00:00:00 2001 From: gwenn Date: Sun, 30 Jun 2024 14:51:00 +0200 Subject: [PATCH 15/22] Check unsupported use of NULLS --- src/lexer/sql/test.rs | 21 ++++++++++++ src/parser/ast/mod.rs | 78 ++++++++++++++++++++++++++++++++++++++++++- src/parser/parse.y | 11 +++--- 3 files changed, 103 insertions(+), 7 deletions(-) diff --git a/src/lexer/sql/test.rs b/src/lexer/sql/test.rs index 25f8563..fea058e 100644 --- a/src/lexer/sql/test.rs +++ b/src/lexer/sql/test.rs @@ -91,6 +91,27 @@ fn generated() { ) } +#[test] +fn has_explicit_nulls() { + expect_parser_err_msg( + b"CREATE TABLE x(a TEXT, PRIMARY KEY (a ASC NULLS FIRST))", + "unsupported use of NULLS FIRST", + ); + expect_parser_err_msg( + b"CREATE TABLE x(a TEXT, UNIQUE (a ASC NULLS LAST))", + "unsupported use of NULLS LAST", + ); + expect_parser_err_msg( + b"INSERT INTO x VALUES('v') + ON CONFLICT (a DESC NULLS FIRST) DO UPDATE SET a = a+1", + "unsupported use of NULLS FIRST", + ); + expect_parser_err_msg( + b"CREATE INDEX i ON x(a ASC NULLS LAST)", + "unsupported use of NULLS LAST", + ) +} + #[test] fn vtab_args() -> Result<(), Error> { let sql = b"CREATE VIRTUAL TABLE mail USING fts3( diff --git a/src/parser/ast/mod.rs b/src/parser/ast/mod.rs index 64b69ab..3154d3e 100644 --- a/src/parser/ast/mod.rs +++ b/src/parser/ast/mod.rs @@ -267,7 +267,26 @@ pub enum Stmt { } impl Stmt { - /// constructor + /// CREATE INDEX constructor + pub fn create_index( + unique: bool, + if_not_exists: bool, + idx_name: QualifiedName, + tbl_name: Name, + columns: Vec, + where_clause: Option, + ) -> Result { + has_explicit_nulls(&columns)?; + Ok(Stmt::CreateIndex { + unique, + if_not_exists, + idx_name, + tbl_name, + columns, + where_clause, + }) + } + /// UPDATE constructor #[allow(clippy::too_many_arguments)] pub fn update( with: Option, @@ -1636,6 +1655,33 @@ pub enum TableConstraint { }, } +impl TableConstraint { + /// PK constructor + pub fn primary_key( + columns: Vec, + auto_increment: bool, + conflict_clause: Option, + ) -> Result { + has_explicit_nulls(&columns)?; + Ok(TableConstraint::PrimaryKey { + columns, + auto_increment, + conflict_clause, + }) + } + /// UNIQUE constructor + pub fn unique( + columns: Vec, + conflict_clause: Option, + ) -> Result { + has_explicit_nulls(&columns)?; + Ok(TableConstraint::Unique { + columns, + conflict_clause, + }) + } +} + /// Sort orders #[derive(Copy, Clone, Debug, PartialEq, Eq)] pub enum SortOrder { @@ -1744,6 +1790,22 @@ pub struct SortedColumn { pub nulls: Option, } +fn has_explicit_nulls(columns: &Vec) -> Result<(), ParserError> { + for column in columns { + if let Some(ref nulls) = column.nulls { + return Err(custom_err!( + "unsupported use of NULLS {}", + if *nulls == NullsOrder::First { + "FIRST" + } else { + "LAST" + } + )); + } + } + Ok(()) +} + /// `LIMIT` #[derive(Clone, Debug, PartialEq, Eq)] pub struct Limit { @@ -1999,6 +2061,20 @@ pub struct UpsertIndex { pub where_clause: Option, } +impl UpsertIndex { + /// constructor + pub fn new( + targets: Vec, + where_clause: Option, + ) -> Result { + has_explicit_nulls(&targets)?; + Ok(UpsertIndex { + targets, + where_clause, + }) + } +} + /// Upsert `DO` action #[derive(Clone, Debug, PartialEq, Eq)] pub enum UpsertDo { diff --git a/src/parser/parse.y b/src/parser/parse.y index bf29c6c..3d9d9d0 100644 --- a/src/parser/parse.y +++ b/src/parser/parse.y @@ -428,12 +428,12 @@ tconscomma ::= . tcons ::= CONSTRAINT nm(X). { self.ctx.constraint_name = Some(X)} tcons(A) ::= PRIMARY KEY LP sortlist(X) autoinc(I) RP onconf(R). { let name = self.ctx.constraint_name(); - let constraint = TableConstraint::PrimaryKey{ columns: X, auto_increment: I, conflict_clause: R }; + let constraint = TableConstraint::primary_key(X, I, R)?; A = NamedTableConstraint{ name, constraint }; } tcons(A) ::= UNIQUE LP sortlist(X) RP onconf(R). { let name = self.ctx.constraint_name(); - let constraint = TableConstraint::Unique{ columns: X, conflict_clause: R }; + let constraint = TableConstraint::unique(X, R)?; A = NamedTableConstraint{ name, constraint }; } tcons(A) ::= CHECK LP expr(E) RP onconf. { @@ -851,12 +851,12 @@ upsert(A) ::= . { A = (None, None); } upsert(A) ::= RETURNING selcollist(X). { A = (None, Some(X)); } upsert(A) ::= ON CONFLICT LP sortlist(T) RP where_opt(TW) DO UPDATE SET setlist(Z) where_opt(W) upsert(N). - { let index = UpsertIndex{ targets: T, where_clause: TW }; + { let index = UpsertIndex::new(T, TW)?; let do_clause = UpsertDo::Set{ sets: Z, where_clause: W }; let (next, returning) = N; A = (Some(Upsert{ index: Some(index), do_clause, next: next.map(Box::new) }), returning);} upsert(A) ::= ON CONFLICT LP sortlist(T) RP where_opt(TW) DO NOTHING upsert(N). - { let index = UpsertIndex{ targets: T, where_clause: TW }; + { let index = UpsertIndex::new(T, TW)?; let (next, returning) = N; A = (Some(Upsert{ index: Some(index), do_clause: UpsertDo::Nothing, next: next.map(Box::new) }), returning); } upsert(A) ::= ON CONFLICT DO NOTHING returning(R). @@ -1080,8 +1080,7 @@ paren_exprlist(A) ::= LP exprlist(X) RP. {A = X;} // cmd ::= createkw uniqueflag(U) INDEX ifnotexists(NE) fullname(X) ON nm(Y) LP sortlist(Z) RP where_opt(W). { - self.ctx.stmt = Some(Stmt::CreateIndex { unique: U, if_not_exists: NE, idx_name: X, - tbl_name: Y, columns: Z, where_clause: W }); + self.ctx.stmt = Some(Stmt::create_index(U, NE, X, Y, Z, W)?); } %type uniqueflag {bool} From 2bca8e4562c357ee18af064639287e23420dd9cc Mon Sep 17 00:00:00 2001 From: gwenn Date: Wed, 3 Jul 2024 16:02:27 +0200 Subject: [PATCH 16/22] Check table has more than one primary key --- checks.md | 9 --------- src/lexer/sql/test.rs | 16 ++++++++++++++++ src/parser/ast/check.rs | 26 +------------------------- src/parser/ast/mod.rs | 18 +++++++++++++++++- 4 files changed, 34 insertions(+), 35 deletions(-) diff --git a/checks.md b/checks.md index f95ac75..e304ac8 100644 --- a/checks.md +++ b/checks.md @@ -62,15 +62,6 @@ sqlite> CREATE TABLE test (a, b, FOREIGN KEY (b) REFERENCES test(a,b)); Parse error: number of columns in foreign key does not match the number of columns in the referenced table ``` -```sql -sqlite> create table test (a,b, primary key(a), primary key(b)); -Parse error: table "test" has more than one primary key -sqlite> create table test (a primary key, b primary key); -Parse error: table "test" has more than one primary key -sqlite> create table test (a primary key, b, primary key(a)); -Parse error: table "test" has more than one primary key -``` - ### `HAVING` - [x] HAVING clause on a non-aggregate query (`GroupBy::having`): grammar already prevents this case (grammar differs from SQLite official grammar). diff --git a/src/lexer/sql/test.rs b/src/lexer/sql/test.rs index fea058e..1025168 100644 --- a/src/lexer/sql/test.rs +++ b/src/lexer/sql/test.rs @@ -91,6 +91,22 @@ fn generated() { ) } +#[test] +fn more_than_one_pk() { + expect_parser_err_msg( + b"CREATE TABLE test (a,b, PRIMARY KEY(a), PRIMARY KEY(b))", + "table has more than one primary key", + ); + expect_parser_err_msg( + b"CREATE TABLE test (a PRIMARY KEY, b PRIMARY KEY)", + "table has more than one primary key", + ); + expect_parser_err_msg( + b"CREATE TABLE test (a PRIMARY KEY, b, PRIMARY KEY(a))", + "table has more than one primary key", + ); +} + #[test] fn has_explicit_nulls() { expect_parser_err_msg( diff --git a/src/parser/ast/check.rs b/src/parser/ast/check.rs index 8c92a4d..73bb0ed 100644 --- a/src/parser/ast/check.rs +++ b/src/parser/ast/check.rs @@ -253,36 +253,12 @@ impl CreateTableBody { } } } - if flags.contains(TabFlags::WithoutRowid) && !self.has_primary_key() { + if flags.contains(TabFlags::WithoutRowid) && !flags.contains(TabFlags::HasPrimaryKey) { return Err(custom_err!("PRIMARY KEY missing on table {}", tbl_name,)); } } Ok(()) } - - /// explicit primary key constraint ? - pub fn has_primary_key(&self) -> bool { - if let CreateTableBody::ColumnsAndConstraints { - columns, - constraints, - .. - } = self - { - for col in columns.values() { - if col.flags.contains(ColFlags::PRIMKEY) { - return true; - } - } - if let Some(constraints) = constraints { - for c in constraints { - if let TableConstraint::PrimaryKey { .. } = c.constraint { - return true; - } - } - } - } - false - } } impl<'a> IntoIterator for &'a ColumnDefinition { diff --git a/src/parser/ast/mod.rs b/src/parser/ast/mod.rs index 3154d3e..2799f78 100644 --- a/src/parser/ast/mod.rs +++ b/src/parser/ast/mod.rs @@ -1389,6 +1389,22 @@ impl CreateTableBody { flags |= TabFlags::HasPrimaryKey; } } + if let Some(ref constraints) = constraints { + for c in constraints { + if let NamedTableConstraint { + constraint: TableConstraint::PrimaryKey { .. }, + .. + } = c + { + if flags.contains(TabFlags::HasPrimaryKey) { + // FIXME table name + return Err(custom_err!("table has more than one primary key")); + } else { + flags |= TabFlags::HasPrimaryKey; + } + } + } + } Ok(CreateTableBody::ColumnsAndConstraints { columns, constraints, @@ -1533,7 +1549,7 @@ impl ColumnDefinition { flags, }) } - /// Constructor + /// Collector pub fn add_column( columns: &mut IndexMap, cd: ColumnDefinition, From c5ea19505c7546d29a87a2243be1555563ac9a0d Mon Sep 17 00:00:00 2001 From: gwenn Date: Sun, 4 Aug 2024 13:24:24 +0200 Subject: [PATCH 17/22] Fix sql_cmds example Continue parsing input file after an error only if YYNOERRORRECOVERY is disabled --- examples/sql_cmds.rs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/examples/sql_cmds.rs b/examples/sql_cmds.rs index eab763a..e4da717 100644 --- a/examples/sql_cmds.rs +++ b/examples/sql_cmds.rs @@ -3,7 +3,9 @@ use std::env; use std::fs::read; use std::panic; -use sqlite3_parser::lexer::sql::{Error, Parser}; +#[cfg(not(feature = "YYNOERRORRECOVERY"))] +use sqlite3_parser::lexer::sql::Error; +use sqlite3_parser::lexer::sql::Parser; /// Parse specified files and print all commands. fn main() { @@ -19,6 +21,9 @@ fn main() { Ok(None) => break, Err(err) => { eprintln!("Err: {err} in {arg}"); + #[cfg(feature = "YYNOERRORRECOVERY")] + break; + #[cfg(not(feature = "YYNOERRORRECOVERY"))] if let Error::ParserError(..) = err { } else { break; From b5f8d16af2f9c7b5bc98778c2eadfe1ffd344984 Mon Sep 17 00:00:00 2001 From: gwenn Date: Sun, 20 Oct 2024 12:50:38 +0200 Subject: [PATCH 18/22] Compute (line, column) position only on error When an error occurs in the parser, input has already been consumed / shifted by the lexer. But we want the start position, not the end. --- examples/sql_check.rs | 4 +-- src/lexer/scan.rs | 75 +++++++++++++++++++++++------------------- src/lexer/sql/error.rs | 64 +++++++++++++++++------------------ src/lexer/sql/mod.rs | 35 +++++++++++--------- 4 files changed, 96 insertions(+), 82 deletions(-) diff --git a/examples/sql_check.rs b/examples/sql_check.rs index 1e7b05c..3fa5a67 100644 --- a/examples/sql_check.rs +++ b/examples/sql_check.rs @@ -29,14 +29,14 @@ fn main() { eprintln!( "Check Err in {}:{}, {} in\n{}\n{:?}", arg, - parser.line(), + parser.position(), err, input, cmd ); } Ok(None) => { - eprintln!("Check Err in {}:{}, {:?}", arg, parser.line(), cmd); + eprintln!("Check Err in {}:{}, {:?}", arg, parser.position(), cmd); } Ok(Some(check)) => { if cmd != check { diff --git a/src/lexer/scan.rs b/src/lexer/scan.rs index d38c6f8..ee3832c 100644 --- a/src/lexer/scan.rs +++ b/src/lexer/scan.rs @@ -6,10 +6,40 @@ use std::error::Error; use std::fmt; use std::io; +/// Position +#[derive(Debug)] +pub struct Pos { + /// line number + pub line: usize, + /// column number (byte offset, not char offset) + pub column: usize, +} + +impl Pos { + pub fn from(input: &[u8], offset: usize) -> Self { + let (mut line, mut column) = (1, 1); + for byte in &input[..offset] { + if *byte == b'\n' { + line += 1; + column = 1; + } else { + column += 1; + } + } + Self { line, column } + } +} + +impl fmt::Display for Pos { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "line: {}, column: {}", self.line, self.column) + } +} + /// Error with position pub trait ScanError: Error + From + Sized { /// Update the position where the error occurs - fn position(&mut self, line: u64, column: usize); + fn position(&mut self, p: Pos); } /// The `(&[u8], TokenType)` is the token. @@ -23,7 +53,7 @@ pub trait Splitter: Sized { type Error: ScanError; //type Item: ?Sized; /// Token generated - type TokenType; + type TokenType: std::fmt::Debug; /// The arguments are an initial substring of the remaining unprocessed /// data. @@ -49,13 +79,9 @@ pub struct Scanner { /// offset in `input` offset: usize, /// mark - mark: (usize, u64, usize), + mark: usize, /// The function to tokenize the input. splitter: S, - /// current line number - line: u64, - /// current column number (byte offset, not char offset) - column: usize, } impl Scanner { @@ -63,40 +89,32 @@ impl Scanner { pub fn new(splitter: S) -> Self { Self { offset: 0, - mark: (0, 0, 0), + mark: 0, splitter, - line: 1, - column: 1, } } - /// Current line number - pub fn line(&self) -> u64 { - self.line + /// Current position + pub fn position(&self, input: &[u8]) -> Pos { + Pos::from(input, self.offset) } - /// Current column number (byte offset, not char offset) - pub fn column(&self) -> usize { - self.column - } /// Associated splitter pub fn splitter(&self) -> &S { &self.splitter } /// Mark current position pub fn mark(&mut self) { - self.mark = (self.offset, self.line, self.column); + self.mark = self.offset; } /// Reset to mark pub fn reset_to_mark(&mut self) { - (self.offset, self.line, self.column) = self.mark; + self.offset = self.mark; } /// Reset the scanner such that it behaves as if it had never been used. pub fn reset(&mut self) { self.offset = 0; - self.line = 1; - self.column = 1; } } @@ -112,7 +130,7 @@ impl Scanner { &mut self, input: &'input [u8], ) -> ScanResult<'input, S::TokenType, S::Error> { - debug!(target: "scanner", "scan(line: {}, column: {})", self.line, self.column); + debug!(target: "scanner", "scan({})", Pos::from(input, self.offset)); // Loop until we have a token. loop { // See if we can get a token with what we already have. @@ -120,7 +138,7 @@ impl Scanner { let data = &input[self.offset..]; match self.splitter.split(data) { Err(mut e) => { - e.position(self.line, self.column); + e.position(Pos::from(input, self.offset)); return Err(e); } Ok((None, 0)) => { @@ -134,6 +152,7 @@ impl Scanner { Ok((tok, amt)) => { let start = self.offset; self.consume(data, amt); + debug!(target: "scanner", "scan(start: {}, tok: {:?}, offset: {})", start, tok, self.offset); return Ok((start, tok, self.offset)); } } @@ -148,14 +167,6 @@ impl Scanner { fn consume(&mut self, data: &[u8], amt: usize) { debug!(target: "scanner", "consume({})", amt); debug_assert!(amt <= data.len()); - for byte in &data[..amt] { - if *byte == b'\n' { - self.line += 1; - self.column = 1; - } else { - self.column += 1; - } - } self.offset += amt; } } @@ -165,8 +176,6 @@ impl fmt::Debug for Scanner { f.debug_struct("Scanner") .field("offset", &self.offset) .field("mark", &self.mark) - .field("line", &self.line) - .field("column", &self.column) .finish() } } diff --git a/src/lexer/sql/error.rs b/src/lexer/sql/error.rs index 2ccc1a3..3663dee 100644 --- a/src/lexer/sql/error.rs +++ b/src/lexer/sql/error.rs @@ -2,7 +2,7 @@ use std::error; use std::fmt; use std::io; -use crate::lexer::scan::ScanError; +use crate::lexer::scan::{Pos, ScanError}; use crate::parser::ParserError; /// SQL lexer and parser errors @@ -12,49 +12,49 @@ pub enum Error { /// I/O Error Io(io::Error), /// Lexer error - UnrecognizedToken(Option<(u64, usize)>), + UnrecognizedToken(Option), /// Missing quote or double-quote or backtick - UnterminatedLiteral(Option<(u64, usize)>), + UnterminatedLiteral(Option), /// Missing `]` - UnterminatedBracket(Option<(u64, usize)>), + UnterminatedBracket(Option), /// Missing `*/` - UnterminatedBlockComment(Option<(u64, usize)>), + UnterminatedBlockComment(Option), /// Invalid parameter name - BadVariableName(Option<(u64, usize)>), + BadVariableName(Option), /// Invalid number format - BadNumber(Option<(u64, usize)>), + BadNumber(Option), /// Invalid or missing sign after `!` - ExpectedEqualsSign(Option<(u64, usize)>), + ExpectedEqualsSign(Option), /// BLOB literals are string literals containing hexadecimal data and preceded by a single "x" or "X" character. - MalformedBlobLiteral(Option<(u64, usize)>), + MalformedBlobLiteral(Option), /// Hexadecimal integer literals follow the C-language notation of "0x" or "0X" followed by hexadecimal digits. - MalformedHexInteger(Option<(u64, usize)>), + MalformedHexInteger(Option), /// Grammar error - ParserError(ParserError, Option<(u64, usize)>), + ParserError(ParserError, Option), } impl fmt::Display for Error { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match *self { + match self { Self::Io(ref err) => err.fmt(f), - Self::UnrecognizedToken(pos) => write!(f, "unrecognized token at {:?}", pos.unwrap()), + Self::UnrecognizedToken(pos) => write!(f, "unrecognized token at {:?}", pos), Self::UnterminatedLiteral(pos) => { - write!(f, "non-terminated literal at {:?}", pos.unwrap()) + write!(f, "non-terminated literal at {:?}", pos) } Self::UnterminatedBracket(pos) => { - write!(f, "non-terminated bracket at {:?}", pos.unwrap()) + write!(f, "non-terminated bracket at {:?}", pos) } Self::UnterminatedBlockComment(pos) => { - write!(f, "non-terminated block comment at {:?}", pos.unwrap()) + write!(f, "non-terminated block comment at {:?}", pos) } - Self::BadVariableName(pos) => write!(f, "bad variable name at {:?}", pos.unwrap()), - Self::BadNumber(pos) => write!(f, "bad number at {:?}", pos.unwrap()), - Self::ExpectedEqualsSign(pos) => write!(f, "expected = sign at {:?}", pos.unwrap()), + Self::BadVariableName(pos) => write!(f, "bad variable name at {:?}", pos), + Self::BadNumber(pos) => write!(f, "bad number at {:?}", pos), + Self::ExpectedEqualsSign(pos) => write!(f, "expected = sign at {:?}", pos), Self::MalformedBlobLiteral(pos) => { - write!(f, "malformed blob literal at {:?}", pos.unwrap()) + write!(f, "malformed blob literal at {:?}", pos) } Self::MalformedHexInteger(pos) => { - write!(f, "malformed hex integer at {:?}", pos.unwrap()) + write!(f, "malformed hex integer at {:?}", pos) } Self::ParserError(ref msg, Some(pos)) => write!(f, "{msg} at {pos:?}"), Self::ParserError(ref msg, _) => write!(f, "{msg}"), @@ -77,19 +77,19 @@ impl From for Error { } impl ScanError for Error { - fn position(&mut self, line: u64, column: usize) { + fn position(&mut self, p: Pos) { match *self { Self::Io(_) => {} - Self::UnrecognizedToken(ref mut pos) => *pos = Some((line, column)), - Self::UnterminatedLiteral(ref mut pos) => *pos = Some((line, column)), - Self::UnterminatedBracket(ref mut pos) => *pos = Some((line, column)), - Self::UnterminatedBlockComment(ref mut pos) => *pos = Some((line, column)), - Self::BadVariableName(ref mut pos) => *pos = Some((line, column)), - Self::BadNumber(ref mut pos) => *pos = Some((line, column)), - Self::ExpectedEqualsSign(ref mut pos) => *pos = Some((line, column)), - Self::MalformedBlobLiteral(ref mut pos) => *pos = Some((line, column)), - Self::MalformedHexInteger(ref mut pos) => *pos = Some((line, column)), - Self::ParserError(_, ref mut pos) => *pos = Some((line, column)), + Self::UnrecognizedToken(ref mut pos) => *pos = Some(p), + Self::UnterminatedLiteral(ref mut pos) => *pos = Some(p), + Self::UnterminatedBracket(ref mut pos) => *pos = Some(p), + Self::UnterminatedBlockComment(ref mut pos) => *pos = Some(p), + Self::BadVariableName(ref mut pos) => *pos = Some(p), + Self::BadNumber(ref mut pos) => *pos = Some(p), + Self::ExpectedEqualsSign(ref mut pos) => *pos = Some(p), + Self::MalformedBlobLiteral(ref mut pos) => *pos = Some(p), + Self::MalformedHexInteger(ref mut pos) => *pos = Some(p), + Self::ParserError(_, ref mut pos) => *pos = Some(p), } } } diff --git a/src/lexer/sql/mod.rs b/src/lexer/sql/mod.rs index 1adef9b..c7b6ce2 100644 --- a/src/lexer/sql/mod.rs +++ b/src/lexer/sql/mod.rs @@ -49,13 +49,9 @@ impl<'input> Parser<'input> { self.input = input; self.scanner.reset(); } - /// Current line position in input - pub fn line(&self) -> u64 { - self.scanner.line() - } - /// Current column position in input - pub fn column(&self) -> usize { - self.scanner.column() + /// Current position in input + pub fn position(&self) -> Pos { + self.scanner.position(self.input) } } @@ -153,12 +149,12 @@ fn analyze_filter_keyword( } macro_rules! try_with_position { - ($scanner:expr, $expr:expr) => { + ($input:expr, $offset:expr, $expr:expr) => { match $expr { Ok(val) => val, Err(err) => { let mut err = Error::from(err); - err.position($scanner.line(), $scanner.column()); + err.position(Pos::from($input, $offset)); return Err(err); } } @@ -173,10 +169,12 @@ impl<'input> FallibleIterator for Parser<'input> { //print!("line: {}, column: {}: ", self.scanner.line(), self.scanner.column()); self.parser.ctx.reset(); let mut last_token_parsed = TK_EOF; + let offset; let mut eof = false; loop { let (start, (value, mut token_type), end) = match self.scanner.scan(self.input)? { - (_, None, _) => { + (start, None, _) => { + offset = start; eof = true; break; } @@ -202,10 +200,15 @@ impl<'input> FallibleIterator for Parser<'input> { token_type.to_token(start, value, end) }; //println!("({:?}, {:?})", token_type, token); - try_with_position!(self.scanner, self.parser.sqlite3Parser(token_type, token)); + try_with_position!( + self.input, + start, + self.parser.sqlite3Parser(token_type, token) + ); last_token_parsed = token_type; if self.parser.ctx.done() { //println!(); + offset = start; break; } } @@ -217,26 +220,28 @@ impl<'input> FallibleIterator for Parser<'input> { if eof && self.parser.ctx.is_ok() { if last_token_parsed != TK_SEMI { try_with_position!( - self.scanner, + self.input, + offset, self.parser .sqlite3Parser(TK_SEMI, sentinel(self.input.len())) ); } try_with_position!( - self.scanner, + self.input, + offset, self.parser .sqlite3Parser(TK_EOF, sentinel(self.input.len())) ); } self.parser.sqlite3ParserFinalize(); if let Some(e) = self.parser.ctx.error() { - let err = Error::ParserError(e, Some((self.scanner.line(), self.scanner.column()))); + let err = Error::ParserError(e, Some(Pos::from(self.input, offset))); return Err(err); } let cmd = self.parser.ctx.cmd(); if let Some(ref cmd) = cmd { if let Err(e) = cmd.check() { - let err = Error::ParserError(e, Some((self.scanner.line(), self.scanner.column()))); + let err = Error::ParserError(e, Some(Pos::from(self.input, offset))); return Err(err); } } From 013aa7cd4a66b426c0f5d0865f5434928c99b095 Mon Sep 17 00:00:00 2001 From: gwenn Date: Sun, 20 Oct 2024 18:56:28 +0200 Subject: [PATCH 19/22] Oops --- src/lexer/sql/mod.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/lexer/sql/mod.rs b/src/lexer/sql/mod.rs index c7b6ce2..20cb0ad 100644 --- a/src/lexer/sql/mod.rs +++ b/src/lexer/sql/mod.rs @@ -15,8 +15,7 @@ mod error; #[cfg(test)] mod test; -use crate::lexer::scan::ScanError; -use crate::lexer::scan::Splitter; +use crate::lexer::scan::{Pos, ScanError, Splitter}; use crate::lexer::Scanner; pub use crate::parser::ParserError; pub use error::Error; From 24ce015e72e4765bcded3abd05b5b245d0270059 Mon Sep 17 00:00:00 2001 From: gwenn Date: Fri, 1 Nov 2024 17:13:17 +0100 Subject: [PATCH 20/22] Fix clippy warnings --- src/lexer/sql/error.rs | 18 +++++++++--------- src/parser/ast/mod.rs | 36 ++++++++++++++++++------------------ 2 files changed, 27 insertions(+), 27 deletions(-) diff --git a/src/lexer/sql/error.rs b/src/lexer/sql/error.rs index 3663dee..56cd05f 100644 --- a/src/lexer/sql/error.rs +++ b/src/lexer/sql/error.rs @@ -37,24 +37,24 @@ impl fmt::Display for Error { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { Self::Io(ref err) => err.fmt(f), - Self::UnrecognizedToken(pos) => write!(f, "unrecognized token at {:?}", pos), + Self::UnrecognizedToken(pos) => write!(f, "unrecognized token at {pos:?}"), Self::UnterminatedLiteral(pos) => { - write!(f, "non-terminated literal at {:?}", pos) + write!(f, "non-terminated literal at {pos:?}") } Self::UnterminatedBracket(pos) => { - write!(f, "non-terminated bracket at {:?}", pos) + write!(f, "non-terminated bracket at {pos:?}") } Self::UnterminatedBlockComment(pos) => { - write!(f, "non-terminated block comment at {:?}", pos) + write!(f, "non-terminated block comment at {pos:?}") } - Self::BadVariableName(pos) => write!(f, "bad variable name at {:?}", pos), - Self::BadNumber(pos) => write!(f, "bad number at {:?}", pos), - Self::ExpectedEqualsSign(pos) => write!(f, "expected = sign at {:?}", pos), + Self::BadVariableName(pos) => write!(f, "bad variable name at {pos:?}"), + Self::BadNumber(pos) => write!(f, "bad number at {pos:?}"), + Self::ExpectedEqualsSign(pos) => write!(f, "expected = sign at {pos:?}"), Self::MalformedBlobLiteral(pos) => { - write!(f, "malformed blob literal at {:?}", pos) + write!(f, "malformed blob literal at {pos:?}") } Self::MalformedHexInteger(pos) => { - write!(f, "malformed hex integer at {:?}", pos) + write!(f, "malformed hex integer at {pos:?}") } Self::ParserError(ref msg, Some(pos)) => write!(f, "{msg} at {pos:?}"), Self::ParserError(ref msg, _) => write!(f, "{msg}"), diff --git a/src/parser/ast/mod.rs b/src/parser/ast/mod.rs index f2bc8d2..85f793f 100644 --- a/src/parser/ast/mod.rs +++ b/src/parser/ast/mod.rs @@ -275,9 +275,9 @@ impl Stmt { tbl_name: Name, columns: Vec, where_clause: Option, - ) -> Result { + ) -> Result { has_explicit_nulls(&columns)?; - Ok(Stmt::CreateIndex { + Ok(Self::CreateIndex { unique, if_not_exists, idx_name, @@ -299,7 +299,7 @@ impl Stmt { returning: Option>, order_by: Option>, limit: Option, - ) -> Result { + ) -> Result { if let Some(FromClause { select: Some(ref select), ref joins, @@ -321,7 +321,7 @@ impl Stmt { if order_by.is_some() && limit.is_none() { return Err(custom_err!("ORDER BY without LIMIT on UPDATE")); } - Ok(Stmt::Update { + Ok(Self::Update { with, or_conflict, tbl_name, @@ -560,18 +560,18 @@ impl Expr { xt: YYCODETYPE, x: Token, distinctness: Option, - args: Option>, + args: Option>, order_by: Option>, filter_over: Option, - ) -> Result { + ) -> Result { if let Some(Distinctness::Distinct) = distinctness { - if args.as_ref().map_or(0, |v| v.len()) != 1 { + if args.as_ref().map_or(0, Vec::len) != 1 { return Err(custom_err!( "DISTINCT aggregates must have exactly one argument" )); } } - Ok(Expr::FunctionCall { + Ok(Self::FunctionCall { name: Id::from_token(xt, x), distinctness, args, @@ -854,7 +854,7 @@ impl OneSelect { where_clause: Option, group_by: Option, window_clause: Option>, - ) -> Result { + ) -> Result { if from.is_none() && columns .iter() @@ -862,7 +862,7 @@ impl OneSelect { { return Err(custom_err!("no tables specified")); } - Ok(OneSelect::Select { + Ok(Self::Select { distinctness, columns, from, @@ -1480,7 +1480,7 @@ impl ColumnDefinition { .. } => { // The child table may reference the primary key of the parent without specifying the primary key column - if columns.as_ref().map_or(0, |v| v.len()) > 1 { + if columns.as_ref().map_or(0, Vec::len) > 1 { return Err(custom_err!( "foreign key on {} should reference only one column of table {}", col_name, @@ -1533,7 +1533,7 @@ impl ColumnDefinition { if col_type.as_ref().map_or(false, |t| !t.name.is_empty()) { flags |= ColFlags::HASTYPE; } - Ok(ColumnDefinition { + Ok(Self { col_name, col_type, constraints, @@ -1665,9 +1665,9 @@ impl TableConstraint { columns: Vec, auto_increment: bool, conflict_clause: Option, - ) -> Result { + ) -> Result { has_explicit_nulls(&columns)?; - Ok(TableConstraint::PrimaryKey { + Ok(Self::PrimaryKey { columns, auto_increment, conflict_clause, @@ -1677,9 +1677,9 @@ impl TableConstraint { pub fn unique( columns: Vec, conflict_clause: Option, - ) -> Result { + ) -> Result { has_explicit_nulls(&columns)?; - Ok(TableConstraint::Unique { + Ok(Self::Unique { columns, conflict_clause, }) @@ -2067,9 +2067,9 @@ impl UpsertIndex { pub fn new( targets: Vec, where_clause: Option, - ) -> Result { + ) -> Result { has_explicit_nulls(&targets)?; - Ok(UpsertIndex { + Ok(Self { targets, where_clause, }) From 531b72e7b9ebf846bcc24e0935aa262e852c7d1d Mon Sep 17 00:00:00 2001 From: gwenn Date: Fri, 1 Nov 2024 17:59:20 +0100 Subject: [PATCH 21/22] Check GROUP BY out of range --- checks.md | 6 +++--- src/lexer/sql/test.rs | 12 ++++++++++++ src/parser/ast/mod.rs | 41 +++++++++++++++++++++++++++++++++++++++-- 3 files changed, 54 insertions(+), 5 deletions(-) diff --git a/checks.md b/checks.md index e304ac8..9aa8143 100644 --- a/checks.md +++ b/checks.md @@ -46,11 +46,11 @@ Parse error: must have at least one non-generated column ```sql sqlite> CREATE TABLE t(a REFERENCES o(a,b)); -Parse error: foreign key on a should reference only one column of table o +Parse error: foreign key on a should reference only one column of table o -- done CREATE TABLE t(a REFERENCES o(a,b)); error here ---^ sqlite> CREATE TABLE t(a PRIMARY KEY AUTOINCREMENT) WITHOUT ROWID; -Parse error: AUTOINCREMENT is only allowed on an INTEGER PRIMARY KEY +Parse error: AUTOINCREMENT is only allowed on an INTEGER PRIMARY KEY -- done sqlite> CREATE TABLE t(a INTEGER PRIMARY KEY AUTOINCREMENT) WITHOUT ROWID; Parse error: AUTOINCREMENT not allowed on WITHOUT ROWID tables ``` @@ -106,7 +106,7 @@ Parse error: no such column: j ```sql sqlite> CREATE TABLE test (n, m); -sqlite> INSERT INTO test (n, n, m) VALUES (1, 0, 1); -- pgsql KO +sqlite> INSERT INTO test (n, n, m) VALUES (1, 0, 1); -- pgsql KO, done sqlite> SELECT * FROM test; 1|1 sqlite> UPDATE test SET n = 1, n = 0; -- pgsql KO diff --git a/src/lexer/sql/test.rs b/src/lexer/sql/test.rs index 312fc1a..f7a8a40 100644 --- a/src/lexer/sql/test.rs +++ b/src/lexer/sql/test.rs @@ -426,6 +426,18 @@ fn no_tables_specified() { parse_cmd(b"SELECT count(*)"); } +#[test] +fn group_by_out_of_range() { + expect_parser_err_msg( + b"SELECT a, b FROM x GROUP BY 0", + "GROUP BY term out of range - should be between 1 and 2", + ); + expect_parser_err_msg( + b"SELECT a, b FROM x GROUP BY 3", + "GROUP BY term out of range - should be between 1 and 2", + ); +} + #[test] fn update_from_target() { expect_parser_err_msg( diff --git a/src/parser/ast/mod.rs b/src/parser/ast/mod.rs index 85f793f..136e3a7 100644 --- a/src/parser/ast/mod.rs +++ b/src/parser/ast/mod.rs @@ -7,6 +7,7 @@ use std::num::ParseIntError; use std::ops::Deref; use std::str::{self, Bytes, FromStr}; +use check::ColumnCount; use fmt::{ToTokens, TokenStream}; use indexmap::{IndexMap, IndexSet}; @@ -579,6 +580,30 @@ impl Expr { filter_over, }) } + + /// Check if an expression is an integer + pub fn is_integer(&self) -> Option { + if let Self::Literal(Literal::Numeric(ref s)) = self { + if let Ok(n) = usize::from_str(s) { + Some(n) + } else { + None + } + } else { + None + } + } + fn check_range(&self, mx: usize) -> Result<(), ParserError> { + if let Some(i) = self.is_integer() { + if i < 1 || i > mx { + return Err(custom_err!( + "GROUP BY term out of range - should be between 1 and {}", + mx + )); + } + } + Ok(()) + } } /// SQL literal @@ -862,14 +887,26 @@ impl OneSelect { { return Err(custom_err!("no tables specified")); } - Ok(Self::Select { + let select = Self::Select { distinctness, columns, from, where_clause, group_by, window_clause, - }) + }; + if let Self::Select { + group_by: Some(ref gb), + .. + } = select + { + if let ColumnCount::Fixed(n) = select.column_count() { + for expr in &gb.exprs { + expr.check_range(n)?; + } + } + } + Ok(select) } } From ee93c3f273de759de60181fdf55719128801a34d Mon Sep 17 00:00:00 2001 From: gwenn Date: Sat, 2 Nov 2024 09:42:46 +0100 Subject: [PATCH 22/22] Check ORDER BY out of range --- src/lexer/sql/test.rs | 16 ++++++++++++ src/parser/ast/check.rs | 9 ++++--- src/parser/ast/mod.rs | 58 ++++++++++++++++++++++++++++++++--------- src/parser/parse.y | 6 ++--- 4 files changed, 70 insertions(+), 19 deletions(-) diff --git a/src/lexer/sql/test.rs b/src/lexer/sql/test.rs index f7a8a40..d0717a7 100644 --- a/src/lexer/sql/test.rs +++ b/src/lexer/sql/test.rs @@ -438,6 +438,22 @@ fn group_by_out_of_range() { ); } +#[test] +fn order_by_out_of_range() { + expect_parser_err_msg( + b"SELECT a, b FROM x ORDER BY -1", + "ORDER BY term out of range - should be between 1 and 2", + ); + expect_parser_err_msg( + b"SELECT a, b FROM x ORDER BY 0", + "ORDER BY term out of range - should be between 1 and 2", + ); + expect_parser_err_msg( + b"SELECT a, b FROM x ORDER BY 3", + "ORDER BY term out of range - should be between 1 and 2", + ); +} + #[test] fn update_from_target() { expect_parser_err_msg( diff --git a/src/parser/ast/check.rs b/src/parser/ast/check.rs index b376058..e829211 100644 --- a/src/parser/ast/check.rs +++ b/src/parser/ast/check.rs @@ -39,7 +39,8 @@ pub enum ColumnCount { /// With `SELECT *` / PRAGMA Dynamic, /// Constant count - Fixed(usize), + // The default setting for SQLITE_MAX_COLUMN is 2000. You can change it at compile time to values as large as 32767. + Fixed(u16), /// No column None, } @@ -146,7 +147,7 @@ impl Stmt { } // SQLite3 engine raises this error later (not while parsing): match select.column_count() { - ColumnCount::Fixed(n) if n != columns.len() => Err(custom_err!( + ColumnCount::Fixed(n) if n as usize != columns.len() => Err(custom_err!( "expected {} columns for {} but got {}", columns.len(), view_name, @@ -165,7 +166,7 @@ impl Stmt { body: InsertBody::Select(select, ..), .. } => match select.body.select.column_count() { - ColumnCount::Fixed(n) if n != columns.len() => { + ColumnCount::Fixed(n) if n as usize != columns.len() => { Err(custom_err!("{} values for {} columns", n, columns.len())) } _ => Ok(()), @@ -287,7 +288,7 @@ impl OneSelect { Self::Select { columns, .. } => column_count(columns), Self::Values(values) => { assert!(!values.is_empty()); // TODO Validate - ColumnCount::Fixed(values[0].len()) + ColumnCount::Fixed(u16::try_from(values[0].len()).unwrap()) } } } diff --git a/src/parser/ast/mod.rs b/src/parser/ast/mod.rs index 136e3a7..d79bc92 100644 --- a/src/parser/ast/mod.rs +++ b/src/parser/ast/mod.rs @@ -20,7 +20,7 @@ use crate::parser::{parse::YYCODETYPE, ParserError}; #[derive(Default)] pub struct ParameterInfo { /// Number of SQL parameters in a prepared statement, like `sqlite3_bind_parameter_count` - pub count: u32, + pub count: u16, /// Parameter name(s) if any pub names: IndexSet, } @@ -35,7 +35,7 @@ impl TokenStream for ParameterInfo { if variable == "?" { self.count = self.count.saturating_add(1); } else if variable.as_bytes()[0] == b'?' { - let n = u32::from_str(&variable[1..])?; + let n = u16::from_str(&variable[1..])?; if n > self.count { self.count = n; } @@ -582,22 +582,27 @@ impl Expr { } /// Check if an expression is an integer - pub fn is_integer(&self) -> Option { - if let Self::Literal(Literal::Numeric(ref s)) = self { - if let Ok(n) = usize::from_str(s) { + pub fn is_integer(&self) -> Option { + if let Self::Literal(Literal::Numeric(s)) = self { + if let Ok(n) = i64::from_str(s) { Some(n) } else { None } + } else if let Self::Unary(UnaryOperator::Positive, e) = self { + e.is_integer() + } else if let Self::Unary(UnaryOperator::Negative, e) = self { + e.is_integer().map(i64::saturating_neg) } else { None } } - fn check_range(&self, mx: usize) -> Result<(), ParserError> { + fn check_range(&self, term: &str, mx: u16) -> Result<(), ParserError> { if let Some(i) = self.is_integer() { - if i < 1 || i > mx { + if i < 1 || i > mx as i64 { return Err(custom_err!( - "GROUP BY term out of range - should be between 1 and {}", + "{} BY term out of range - should be between 1 and {}", + term, mx )); } @@ -784,15 +789,44 @@ impl From for UnaryOperator { #[derive(Clone, Debug, PartialEq, Eq)] pub struct Select { /// CTE - pub with: Option, + pub with: Option, // TODO check usages in body /// body pub body: SelectBody, /// `ORDER BY` - pub order_by: Option>, // ORDER BY term does not match any column in the result set + pub order_by: Option>, // TODO: ORDER BY term does not match any column in the result set /// `LIMIT` pub limit: Option, } +impl Select { + /// Constructor + pub fn new( + with: Option, + body: SelectBody, + order_by: Option>, + limit: Option, + ) -> Result { + let select = Self { + with, + body, + order_by, + limit, + }; + if let Self { + order_by: Some(ref scs), + .. + } = select + { + if let ColumnCount::Fixed(n) = select.column_count() { + for sc in scs { + sc.expr.check_range("ORDER", n)?; + } + } + } + Ok(select) + } +} + /// `SELECT` body #[derive(Clone, Debug, PartialEq, Eq)] pub struct SelectBody { @@ -902,7 +936,7 @@ impl OneSelect { { if let ColumnCount::Fixed(n) = select.column_count() { for expr in &gb.exprs { - expr.check_range(n)?; + expr.check_range("GROUP", n)?; } } } @@ -2019,7 +2053,7 @@ impl CommonTableExpr { ) -> Result { if let Some(ref columns) = columns { if let check::ColumnCount::Fixed(cc) = select.column_count() { - if cc != columns.len() { + if cc as usize != columns.len() { return Err(custom_err!( "table {} has {} values for {} columns", tbl_name, diff --git a/src/parser/parse.y b/src/parser/parse.y index 08fd0ce..bda7413 100644 --- a/src/parser/parse.y +++ b/src/parser/parse.y @@ -498,14 +498,14 @@ cmd ::= select(X). { %ifndef SQLITE_OMIT_CTE select(A) ::= WITH wqlist(W) selectnowith(X) orderby_opt(Z) limit_opt(L). { - A = Select{ with: Some(With { recursive: false, ctes: W }), body: X, order_by: Z, limit: L }; + A = Select::new(Some(With { recursive: false, ctes: W }), X, Z, L)?; } select(A) ::= WITH RECURSIVE wqlist(W) selectnowith(X) orderby_opt(Z) limit_opt(L). { - A = Select{ with: Some(With { recursive: true, ctes: W }), body: X, order_by: Z, limit: L }; + A = Select::new(Some(With { recursive: true, ctes: W }), X, Z, L)?; } %endif /* SQLITE_OMIT_CTE */ select(A) ::= selectnowith(X) orderby_opt(Z) limit_opt(L). { - A = Select{ with: None, body: X, order_by: Z, limit: L }; /*A-overwrites-X*/ + A = Select::new(None, X, Z, L)?; /*A-overwrites-X*/ } selectnowith(A) ::= oneselect(X). {