From b685ab5723c02461aea4d664fc6e3c499fde94b5 Mon Sep 17 00:00:00 2001 From: Lucio Franco Date: Fri, 27 Oct 2023 10:00:54 -0400 Subject: [PATCH 1/2] Vendored sqlite3-parser and remove patch --- Cargo.lock | 2 +- Cargo.toml | 4 +- libsql-server/Cargo.toml | 2 +- libsql/Cargo.toml | 3 +- .../sqlite3-parser/.github/dependabot.yml | 11 + .../sqlite3-parser/.github/workflows/rust.yml | 21 + vendored/sqlite3-parser/.gitignore | 6 + vendored/sqlite3-parser/CMakeLists.txt | 5 + vendored/sqlite3-parser/Cargo.toml | 44 + vendored/sqlite3-parser/LICENSE | 24 + vendored/sqlite3-parser/README.md | 74 + vendored/sqlite3-parser/Sync.md | 7 + vendored/sqlite3-parser/benches/keyword.rs | 153 + vendored/sqlite3-parser/build.rs | 212 + vendored/sqlite3-parser/examples/simple.y | 123 + vendored/sqlite3-parser/examples/sql_check.rs | 55 + vendored/sqlite3-parser/examples/sql_cmd.rs | 22 + vendored/sqlite3-parser/examples/sql_cmds.rs | 34 + .../sqlite3-parser/examples/sql_tokens.rs | 91 + .../sqlite3-parser/sqlparser_bench/Cargo.toml | 16 + .../sqlite3-parser/sqlparser_bench/README.md | 1 + .../benches/sqlparser_bench.rs | 48 + vendored/sqlite3-parser/src/dialect/mod.rs | 405 ++ vendored/sqlite3-parser/src/dialect/token.rs | 180 + vendored/sqlite3-parser/src/lexer/README.md | 15 + vendored/sqlite3-parser/src/lexer/mod.rs | 6 + vendored/sqlite3-parser/src/lexer/scan.rs | 166 + .../sqlite3-parser/src/lexer/sql/error.rs | 83 + vendored/sqlite3-parser/src/lexer/sql/mod.rs | 644 ++ vendored/sqlite3-parser/src/lexer/sql/test.rs | 123 + vendored/sqlite3-parser/src/lib.rs | 5 + vendored/sqlite3-parser/src/parser/ast/mod.rs | 3248 +++++++++ vendored/sqlite3-parser/src/parser/mod.rs | 149 + vendored/sqlite3-parser/src/parser/parse.y | 1490 +++++ .../sqlite3-parser/third_party/lemon/lemon.c | 5784 +++++++++++++++++ .../third_party/lemon/lempar.rs | 956 +++ 36 files changed, 14205 insertions(+), 7 deletions(-) create mode 100644 vendored/sqlite3-parser/.github/dependabot.yml create mode 100644 vendored/sqlite3-parser/.github/workflows/rust.yml create mode 100644 vendored/sqlite3-parser/.gitignore create mode 100644 vendored/sqlite3-parser/CMakeLists.txt create mode 100644 vendored/sqlite3-parser/Cargo.toml create mode 100644 vendored/sqlite3-parser/LICENSE create mode 100644 vendored/sqlite3-parser/README.md create mode 100644 vendored/sqlite3-parser/Sync.md create mode 100644 vendored/sqlite3-parser/benches/keyword.rs create mode 100644 vendored/sqlite3-parser/build.rs create mode 100644 vendored/sqlite3-parser/examples/simple.y create mode 100644 vendored/sqlite3-parser/examples/sql_check.rs create mode 100644 vendored/sqlite3-parser/examples/sql_cmd.rs create mode 100644 vendored/sqlite3-parser/examples/sql_cmds.rs create mode 100644 vendored/sqlite3-parser/examples/sql_tokens.rs create mode 100644 vendored/sqlite3-parser/sqlparser_bench/Cargo.toml create mode 100644 vendored/sqlite3-parser/sqlparser_bench/README.md create mode 100644 vendored/sqlite3-parser/sqlparser_bench/benches/sqlparser_bench.rs create mode 100644 vendored/sqlite3-parser/src/dialect/mod.rs create mode 100644 vendored/sqlite3-parser/src/dialect/token.rs create mode 100644 vendored/sqlite3-parser/src/lexer/README.md create mode 100644 vendored/sqlite3-parser/src/lexer/mod.rs create mode 100644 vendored/sqlite3-parser/src/lexer/scan.rs create mode 100644 vendored/sqlite3-parser/src/lexer/sql/error.rs create mode 100644 vendored/sqlite3-parser/src/lexer/sql/mod.rs create mode 100644 vendored/sqlite3-parser/src/lexer/sql/test.rs create mode 100644 vendored/sqlite3-parser/src/lib.rs create mode 100644 vendored/sqlite3-parser/src/parser/ast/mod.rs create mode 100644 vendored/sqlite3-parser/src/parser/mod.rs create mode 100644 vendored/sqlite3-parser/src/parser/parse.y create mode 100644 vendored/sqlite3-parser/third_party/lemon/lemon.c create mode 100644 vendored/sqlite3-parser/third_party/lemon/lempar.rs diff --git a/Cargo.lock b/Cargo.lock index 3636241bc8..bf6f9aa87b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3864,10 +3864,10 @@ dependencies = [ [[package]] name = "sqlite3-parser" version = "0.11.0" -source = "git+https://github.com/LucioFranco/lemon-rs#0ffcb2708727014f091c0c686e864d48ad788919" dependencies = [ "bitflags 2.4.1", "cc", + "env_logger", "fallible-iterator 0.3.0", "indexmap 2.0.2", "log", diff --git a/Cargo.toml b/Cargo.toml index 8accb18eee..4f5a8aa1de 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,15 +12,13 @@ members = [ "libsql-sys-tmp", "vendored/rusqlite", + "vendored/sqlite3-parser" ] [profile.release] codegen-units = 1 panic = "abort" -[patch.crates-io] -sqlite3-parser = { git = "https://github.com/LucioFranco/lemon-rs" } - [workspace.dependencies] rusqlite = { path = "vendored/rusqlite", version = "0.29", default-features = false, features = [ "libsql-experimental", diff --git a/libsql-server/Cargo.toml b/libsql-server/Cargo.toml index 3b83b3ddb5..9e64704b10 100644 --- a/libsql-server/Cargo.toml +++ b/libsql-server/Cargo.toml @@ -48,7 +48,7 @@ serde_json = { version = "1.0.91", features = ["preserve_order"] } sha2 = "0.10" sha256 = "1.1.3" sqld-libsql-bindings = { version = "0", path = "../libsql-sys-tmp" } -sqlite3-parser = { version = "0.11.0", default-features = false, features = [ "YYNOERRORRECOVERY" ] } +sqlite3-parser = { path = "../vendored/sqlite3-parser", version = "0.11.0", default-features = false, features = [ "YYNOERRORRECOVERY" ] } tempfile = "3.3.0" thiserror = "1.0.38" tokio = { version = "1.22.2", features = ["rt-multi-thread", "net", "io-std", "io-util", "time", "macros", "sync", "fs", "signal"] } diff --git a/libsql/Cargo.toml b/libsql/Cargo.toml index fe58e996ae..74b8abeff2 100644 --- a/libsql/Cargo.toml +++ b/libsql/Cargo.toml @@ -37,10 +37,9 @@ tower-http = { version = "0.4.4", features = ["trace", "util"], optional = true prost = { version = "0.12", optional = true } http = { version = "0.2", optional = true } -sqlite3-parser = { version = "0.11", optional = true } +sqlite3-parser = { path = "../vendored/sqlite3-parser", version = "0.11", optional = true } fallible-iterator = { version = "0.3", optional = true } - [dev-dependencies] arbitrary = { version = "1.3.0", features = ["derive_arbitrary"] } criterion = { version = "0.5", features = ["html_reports", "async", "async_futures", "async_tokio"] } diff --git a/vendored/sqlite3-parser/.github/dependabot.yml b/vendored/sqlite3-parser/.github/dependabot.yml new file mode 100644 index 0000000000..e8d486ab3f --- /dev/null +++ b/vendored/sqlite3-parser/.github/dependabot.yml @@ -0,0 +1,11 @@ +# To get started with Dependabot version updates, you'll need to specify which +# package ecosystems to update and where the package manifests are located. +# Please see the documentation for all configuration options: +# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates + +version: 2 +updates: + - package-ecosystem: "cargo" # See documentation for possible values + directory: "/" # Location of package manifests + schedule: + interval: "weekly" diff --git a/vendored/sqlite3-parser/.github/workflows/rust.yml b/vendored/sqlite3-parser/.github/workflows/rust.yml new file mode 100644 index 0000000000..49c89d65ac --- /dev/null +++ b/vendored/sqlite3-parser/.github/workflows/rust.yml @@ -0,0 +1,21 @@ +name: CI + +on: [push, pull_request] + +permissions: + contents: read + +jobs: + build: + strategy: + matrix: + os: [ubuntu-latest, windows-latest] + + runs-on: ${{ matrix.os }} + + steps: + - uses: actions/checkout@v3 + - name: Build + run: cargo build + - name: Run tests + run: cargo test diff --git a/vendored/sqlite3-parser/.gitignore b/vendored/sqlite3-parser/.gitignore new file mode 100644 index 0000000000..dc98fe4b66 --- /dev/null +++ b/vendored/sqlite3-parser/.gitignore @@ -0,0 +1,6 @@ +target +rlemon +*.h +*.out +Cargo.lock +cmake-build-debug \ No newline at end of file diff --git a/vendored/sqlite3-parser/CMakeLists.txt b/vendored/sqlite3-parser/CMakeLists.txt new file mode 100644 index 0000000000..466dbc70a6 --- /dev/null +++ b/vendored/sqlite3-parser/CMakeLists.txt @@ -0,0 +1,5 @@ +cmake_minimum_required(VERSION 3.6) +project(rlemon) + +set(SOURCE_FILES third_party/lemon/lemon.c) +add_executable(rlemon ${SOURCE_FILES}) \ No newline at end of file diff --git a/vendored/sqlite3-parser/Cargo.toml b/vendored/sqlite3-parser/Cargo.toml new file mode 100644 index 0000000000..02939eafce --- /dev/null +++ b/vendored/sqlite3-parser/Cargo.toml @@ -0,0 +1,44 @@ +[package] +name = "sqlite3-parser" +version = "0.11.0" +edition = "2021" +authors = ["gwenn"] +description = "SQL parser (as understood by SQLite)" +documentation = "http://docs.rs/sqlite3-parser" +repository = "https://github.com/gwenn/lemon-rs" +readme = "README.md" +categories = ["parser-implementations"] +keywords = ["sql", "parser", "scanner", "tokenizer"] +license = "Apache-2.0/MIT" +build = "build.rs" # Lemon preprocessing + +[badges] +maintenance = { status = "experimental" } + +[features] +# FIXME: specific to one parser, not global +YYTRACKMAXSTACKDEPTH = [] +YYNOERRORRECOVERY = [] +YYSTACKDYNAMIC = [] +YYCOVERAGE = [] +NDEBUG = [] +default = ["YYNOERRORRECOVERY"] + +[dependencies] +phf = { version = "0.11", features = ["uncased"] } +log = "0.4" +memchr = "2.0" +fallible-iterator = "0.3" +smallvec = ">=1.6.1" +bitflags = "2.0" +uncased = "0.9" +indexmap = "2.0" + +[dev-dependencies] +env_logger = { version = "0.10", default-features = false } + +[build-dependencies] +cc = "1.0" +phf_shared = { version = "0.11", features = ["uncased"] } +phf_codegen = "0.11" +uncased = "0.9" diff --git a/vendored/sqlite3-parser/LICENSE b/vendored/sqlite3-parser/LICENSE new file mode 100644 index 0000000000..cf1ab25da0 --- /dev/null +++ b/vendored/sqlite3-parser/LICENSE @@ -0,0 +1,24 @@ +This is free and unencumbered software released into the public domain. + +Anyone is free to copy, modify, publish, use, compile, sell, or +distribute this software, either in source code form or as a compiled +binary, for any purpose, commercial or non-commercial, and by any +means. + +In jurisdictions that recognize copyright laws, the author or authors +of this software dedicate any and all copyright interest in the +software to the public domain. We make this dedication for the benefit +of the public at large and to the detriment of our heirs and +successors. We intend this dedication to be an overt act of +relinquishment in perpetuity of all present and future rights to this +software under copyright law. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +OTHER DEALINGS IN THE SOFTWARE. + +For more information, please refer to diff --git a/vendored/sqlite3-parser/README.md b/vendored/sqlite3-parser/README.md new file mode 100644 index 0000000000..5f756b1346 --- /dev/null +++ b/vendored/sqlite3-parser/README.md @@ -0,0 +1,74 @@ +[![Build Status](https://github.com/gwenn/lemon-rs/workflows/CI/badge.svg)](https://github.com/gwenn/lemon-rs/actions) +[![Latest Version](https://img.shields.io/crates/v/sqlite3-parser.svg)](https://crates.io/crates/sqlite3-parser) +[![Docs](https://docs.rs/sqlite3-parser/badge.svg)](https://docs.rs/sqlite3-parser) +[![dependency status](https://deps.rs/repo/github/gwenn/lemon-rs/status.svg)](https://deps.rs/repo/github/gwenn/lemon-rs) + +[LEMON parser generator](https://www.sqlite.org/src/doc/trunk/doc/lemon.html) modified to generate Rust code. + +Lemon source and SQLite3 grammar were last synced as of May 2022. + +## Unsupported + +### Unsupported Grammar syntax + +* `%token_destructor`: Code to execute to destroy token data +* `%default_destructor`: Code for the default non-terminal destructor +* `%destructor`: Code which executes whenever this symbol is + popped from the stack during error processing + +https://www.codeproject.com/Articles/1056460/Generating-a-High-Speed-Parser-Part-Lemon +https://www.sqlite.org/lemon.html + +### SQLite + +[SQLite lexer](http://www.sqlite.org/src/artifact?ci=trunk&filename=src/tokenize.c) and [SQLite parser](http://www.sqlite.org/src/artifact?ci=trunk&filename=src/parse.y) have been ported from C to Rust. +The parser generates an AST. + +Lexer/Parser: + - Keep track of position (line, column). + - Streamable (stop at the end of statement). + - Resumable (restart after the end of statement). + +Lexer and parser have been tested with the following scripts: + * https://github.com/bkiers/sqlite-parser/tree/master/src/test/resources + * https://github.com/codeschool/sqlite-parser/tree/master/test/sql/official-suite which can be updated with script in https://github.com/codeschool/sqlite-parser/tree/master/test/misc + +TODO: + - [ ] Check generated AST (reparse/reinject) + - [ ] [If a keyword in double quotes is used in a context where it cannot be resolved to an identifier but where a string literal is allowed, then the token is understood to be a string literal instead of an identifier.](https://sqlite.org/lang_keywords.html) + - [ ] Tests + - [ ] Do not panic while parsing + - [ ] CREATE VIRTUAL TABLE args + - [ ] Zero copy (at least tokens) + +### Unsupported by Rust + +* `#line` directive + +## API change + +* No `ParseAlloc`/`ParseFree` anymore + +## Features not tested + +* NDEBUG +* YYNOERRORRECOVERY +* YYERRORSYMBOL + +## To be fixed + +* RHS are moved. Maybe it is not a problem if they are always used once. + Just add a check in lemon... +* `%extra_argument` is not supported. +* Terminal symbols generated by lemon should be dumped in a specified file. + +## Raison d'ĂȘtre + +* [lemon_rust](https://github.com/rodrigorc/lemon_rust) does the same thing +but with an old version of `lemon`. And it seems not possible to use `yystack` +as a stack because items may be access randomly and the `top+1` item can be used. + +* [lalrpop](https://github.com/nikomatsakis/lalrpop) would be the perfect +alternative but it does not support fallback/streaming +(see [this](https://github.com/nikomatsakis/lalrpop/issues/156) issue) +and compilation/generation is slow. diff --git a/vendored/sqlite3-parser/Sync.md b/vendored/sqlite3-parser/Sync.md new file mode 100644 index 0000000000..5bd0ff4a7c --- /dev/null +++ b/vendored/sqlite3-parser/Sync.md @@ -0,0 +1,7 @@ +When some changes happen in the official SQLite repository, +they can be applied locally: + - $SQLITE/tool/lemon.c => $RLEMON/third_party/lemon.c + - $SQLITE/tool/lempar.c => $RLEMON/third_party/lempar.rs + - $SQLITE/tool/mkkeywordhash.c => $RLEMON/src/dialect/mod.rs + - $SQLITE/src/tokenize.c => $RLEMON/src/lexer/sql/mod.rs + - $SQLITE/src/parse.y => $RLEMON/src/parser/parse.y (and $RLEMON/src/dialect/token.rs, $RLEMON/src/dialect/mod.rs) diff --git a/vendored/sqlite3-parser/benches/keyword.rs b/vendored/sqlite3-parser/benches/keyword.rs new file mode 100644 index 0000000000..9f106d50d8 --- /dev/null +++ b/vendored/sqlite3-parser/benches/keyword.rs @@ -0,0 +1,153 @@ +#![feature(test)] +extern crate test; + +use sqlite3_parser::dialect::keyword_token; +use test::Bencher; + +static VALUES: [&[u8]; 136] = [ + b"ABORT", + b"ACTION", + b"ADD", + b"AFTER", + b"ALL", + b"ALTER", + b"ANALYZE", + b"AND", + b"AS", + b"ASC", + b"ATTACH", + b"AUTOINCREMENT", + b"BEFORE", + b"BEGIN", + b"BETWEEN", + b"BY", + b"CASCADE", + b"CASE", + b"CAST", + b"CHECK", + b"COLLATE", + b"COLUMN", + b"COMMIT", + b"CONFLICT", + b"CONSTRAINT", + b"CREATE", + b"CROSS", + b"CURRENT", + b"CURRENT_DATE", + b"CURRENT_TIME", + b"CURRENT_TIMESTAMP", + b"DATABASE", + b"DEFAULT", + b"DEFERRABLE", + b"DEFERRED", + b"DELETE", + b"DESC", + b"DETACH", + b"DISTINCT", + b"DO", + b"DROP", + b"EACH", + b"ELSE", + b"END", + b"ESCAPE", + b"EXCEPT", + b"EXCLUSIVE", + b"EXISTS", + b"EXPLAIN", + b"FAIL", + b"FILTER", + b"FOLLOWING", + b"FOR", + b"FOREIGN", + b"FROM", + b"FULL", + b"GLOB", + b"GROUP", + b"HAVING", + b"IF", + b"IGNORE", + b"IMMEDIATE", + b"IN", + b"INDEX", + b"INDEXED", + b"INITIALLY", + b"INNER", + b"INSERT", + b"INSTEAD", + b"INTERSECT", + b"INTO", + b"IS", + b"ISNULL", + b"JOIN", + b"KEY", + b"LEFT", + b"LIKE", + b"LIMIT", + b"MATCH", + b"NATURAL", + b"NO", + b"NOT", + b"NOTHING", + b"NOTNULL", + b"NULL", + b"OF", + b"OFFSET", + b"ON", + b"OR", + b"ORDER", + b"OUTER", + b"OVER", + b"PARTITION", + b"PLAN", + b"PRAGMA", + b"PRECEDING", + b"PRIMARY", + b"QUERY", + b"RAISE", + b"RANGE", + b"RECURSIVE", + b"REFERENCES", + b"REGEXP", + b"REINDEX", + b"RELEASE", + b"RENAME", + b"REPLACE", + b"RESTRICT", + b"RIGHT", + b"ROLLBACK", + b"ROW", + b"ROWS", + b"SAVEPOINT", + b"SELECT", + b"SET", + b"TABLE", + b"TEMP", + b"TEMPORARY", + b"THEN", + b"TO", + b"TRANSACTION", + b"TRIGGER", + b"UNBOUNDED", + b"UNION", + b"UNIQUE", + b"UPDATE", + b"USING", + b"VACUUM", + b"VALUES", + b"VIEW", + b"VIRTUAL", + b"WHEN", + b"WHERE", + b"WINDOW", + b"WITH", + b"WITHOUT", +]; + +#[bench] +fn bench_keyword_token(b: &mut Bencher) { + b.iter(|| { + for value in VALUES.iter() { + assert!(keyword_token(value).is_some()) + } + }); +} diff --git a/vendored/sqlite3-parser/build.rs b/vendored/sqlite3-parser/build.rs new file mode 100644 index 0000000000..1ec389e180 --- /dev/null +++ b/vendored/sqlite3-parser/build.rs @@ -0,0 +1,212 @@ +use std::env; +use std::fs::File; +use std::io::{BufWriter, Result, Write}; +use std::path::Path; +use std::process::Command; + +use cc::Build; +use uncased::UncasedStr; + +fn main() -> Result<()> { + let out_dir = env::var("OUT_DIR").unwrap(); + let out_path = Path::new(&out_dir); + let rlemon = out_path.join("rlemon"); + + let lemon_src_dir = Path::new("third_party").join("lemon"); + let rlemon_src = lemon_src_dir.join("lemon.c"); + + // compile rlemon: + { + assert!(Build::new() + .target(&env::var("HOST").unwrap()) + .get_compiler() + .to_command() + .arg("-o") + .arg(rlemon.clone()) + .arg(rlemon_src) + .status()? + .success()); + } + + let sql_parser = "src/parser/parse.y"; + // run rlemon / generate parser: + { + assert!(Command::new(rlemon) + .arg("-DSQLITE_ENABLE_UPDATE_DELETE_LIMIT") + .arg("-Tthird_party/lemon/lempar.rs") + .arg(format!("-d{out_dir}")) + .arg(sql_parser) + .status()? + .success()); + // TODO ./rlemon -m -Tthird_party/lemon/lempar.rs examples/simple.y + } + + let keywords = out_path.join("keywords.rs"); + let mut keywords = BufWriter::new(File::create(keywords)?); + write!( + &mut keywords, + "static KEYWORDS: ::phf::Map<&'static UncasedStr, TokenType> = \n{};", + phf_codegen::Map::new() + .entry(UncasedStr::new("ABORT"), "TokenType::TK_ABORT") + .entry(UncasedStr::new("ACTION"), "TokenType::TK_ACTION") + .entry(UncasedStr::new("ADD"), "TokenType::TK_ADD") + .entry(UncasedStr::new("AFTER"), "TokenType::TK_AFTER") + .entry(UncasedStr::new("ALL"), "TokenType::TK_ALL") + .entry(UncasedStr::new("ALTER"), "TokenType::TK_ALTER") + .entry(UncasedStr::new("ALWAYS"), "TokenType::TK_ALWAYS") + .entry(UncasedStr::new("ANALYZE"), "TokenType::TK_ANALYZE") + .entry(UncasedStr::new("AND"), "TokenType::TK_AND") + .entry(UncasedStr::new("AS"), "TokenType::TK_AS") + .entry(UncasedStr::new("ASC"), "TokenType::TK_ASC") + .entry(UncasedStr::new("ATTACH"), "TokenType::TK_ATTACH") + .entry(UncasedStr::new("AUTOINCREMENT"), "TokenType::TK_AUTOINCR") + .entry(UncasedStr::new("BEFORE"), "TokenType::TK_BEFORE") + .entry(UncasedStr::new("BEGIN"), "TokenType::TK_BEGIN") + .entry(UncasedStr::new("BETWEEN"), "TokenType::TK_BETWEEN") + .entry(UncasedStr::new("BY"), "TokenType::TK_BY") + .entry(UncasedStr::new("CASCADE"), "TokenType::TK_CASCADE") + .entry(UncasedStr::new("CASE"), "TokenType::TK_CASE") + .entry(UncasedStr::new("CAST"), "TokenType::TK_CAST") + .entry(UncasedStr::new("CHECK"), "TokenType::TK_CHECK") + .entry(UncasedStr::new("COLLATE"), "TokenType::TK_COLLATE") + .entry(UncasedStr::new("COLUMN"), "TokenType::TK_COLUMNKW") + .entry(UncasedStr::new("COMMIT"), "TokenType::TK_COMMIT") + .entry(UncasedStr::new("CONFLICT"), "TokenType::TK_CONFLICT") + .entry(UncasedStr::new("CONSTRAINT"), "TokenType::TK_CONSTRAINT") + .entry(UncasedStr::new("CREATE"), "TokenType::TK_CREATE") + .entry(UncasedStr::new("CROSS"), "TokenType::TK_JOIN_KW") + .entry(UncasedStr::new("CURRENT"), "TokenType::TK_CURRENT") + .entry(UncasedStr::new("CURRENT_DATE"), "TokenType::TK_CTIME_KW") + .entry(UncasedStr::new("CURRENT_TIME"), "TokenType::TK_CTIME_KW") + .entry( + UncasedStr::new("CURRENT_TIMESTAMP"), + "TokenType::TK_CTIME_KW" + ) + .entry(UncasedStr::new("DATABASE"), "TokenType::TK_DATABASE") + .entry(UncasedStr::new("DEFAULT"), "TokenType::TK_DEFAULT") + .entry(UncasedStr::new("DEFERRABLE"), "TokenType::TK_DEFERRABLE") + .entry(UncasedStr::new("DEFERRED"), "TokenType::TK_DEFERRED") + .entry(UncasedStr::new("DELETE"), "TokenType::TK_DELETE") + .entry(UncasedStr::new("DESC"), "TokenType::TK_DESC") + .entry(UncasedStr::new("DETACH"), "TokenType::TK_DETACH") + .entry(UncasedStr::new("DISTINCT"), "TokenType::TK_DISTINCT") + .entry(UncasedStr::new("DO"), "TokenType::TK_DO") + .entry(UncasedStr::new("DROP"), "TokenType::TK_DROP") + .entry(UncasedStr::new("EACH"), "TokenType::TK_EACH") + .entry(UncasedStr::new("ELSE"), "TokenType::TK_ELSE") + .entry(UncasedStr::new("END"), "TokenType::TK_END") + .entry(UncasedStr::new("ESCAPE"), "TokenType::TK_ESCAPE") + .entry(UncasedStr::new("EXCEPT"), "TokenType::TK_EXCEPT") + .entry(UncasedStr::new("EXCLUDE"), "TokenType::TK_EXCLUDE") + .entry(UncasedStr::new("EXCLUSIVE"), "TokenType::TK_EXCLUSIVE") + .entry(UncasedStr::new("EXISTS"), "TokenType::TK_EXISTS") + .entry(UncasedStr::new("EXPLAIN"), "TokenType::TK_EXPLAIN") + .entry(UncasedStr::new("FAIL"), "TokenType::TK_FAIL") + .entry(UncasedStr::new("FILTER"), "TokenType::TK_FILTER") + .entry(UncasedStr::new("FIRST"), "TokenType::TK_FIRST") + .entry(UncasedStr::new("FOLLOWING"), "TokenType::TK_FOLLOWING") + .entry(UncasedStr::new("FOR"), "TokenType::TK_FOR") + .entry(UncasedStr::new("FOREIGN"), "TokenType::TK_FOREIGN") + .entry(UncasedStr::new("FROM"), "TokenType::TK_FROM") + .entry(UncasedStr::new("FULL"), "TokenType::TK_JOIN_KW") + .entry(UncasedStr::new("GENERATED"), "TokenType::TK_GENERATED") + .entry(UncasedStr::new("GLOB"), "TokenType::TK_LIKE_KW") + .entry(UncasedStr::new("GROUP"), "TokenType::TK_GROUP") + .entry(UncasedStr::new("GROUPS"), "TokenType::TK_GROUPS") + .entry(UncasedStr::new("HAVING"), "TokenType::TK_HAVING") + .entry(UncasedStr::new("IF"), "TokenType::TK_IF") + .entry(UncasedStr::new("IGNORE"), "TokenType::TK_IGNORE") + .entry(UncasedStr::new("IMMEDIATE"), "TokenType::TK_IMMEDIATE") + .entry(UncasedStr::new("IN"), "TokenType::TK_IN") + .entry(UncasedStr::new("INDEX"), "TokenType::TK_INDEX") + .entry(UncasedStr::new("INDEXED"), "TokenType::TK_INDEXED") + .entry(UncasedStr::new("INITIALLY"), "TokenType::TK_INITIALLY") + .entry(UncasedStr::new("INNER"), "TokenType::TK_JOIN_KW") + .entry(UncasedStr::new("INSERT"), "TokenType::TK_INSERT") + .entry(UncasedStr::new("INSTEAD"), "TokenType::TK_INSTEAD") + .entry(UncasedStr::new("INTERSECT"), "TokenType::TK_INTERSECT") + .entry(UncasedStr::new("INTO"), "TokenType::TK_INTO") + .entry(UncasedStr::new("IS"), "TokenType::TK_IS") + .entry(UncasedStr::new("ISNULL"), "TokenType::TK_ISNULL") + .entry(UncasedStr::new("JOIN"), "TokenType::TK_JOIN") + .entry(UncasedStr::new("KEY"), "TokenType::TK_KEY") + .entry(UncasedStr::new("LAST"), "TokenType::TK_LAST") + .entry(UncasedStr::new("LEFT"), "TokenType::TK_JOIN_KW") + .entry(UncasedStr::new("LIKE"), "TokenType::TK_LIKE_KW") + .entry(UncasedStr::new("LIMIT"), "TokenType::TK_LIMIT") + .entry(UncasedStr::new("MATCH"), "TokenType::TK_MATCH") + .entry( + UncasedStr::new("MATERIALIZED"), + "TokenType::TK_MATERIALIZED" + ) + .entry(UncasedStr::new("NATURAL"), "TokenType::TK_JOIN_KW") + .entry(UncasedStr::new("NO"), "TokenType::TK_NO") + .entry(UncasedStr::new("NOT"), "TokenType::TK_NOT") + .entry(UncasedStr::new("NOTHING"), "TokenType::TK_NOTHING") + .entry(UncasedStr::new("NOTNULL"), "TokenType::TK_NOTNULL") + .entry(UncasedStr::new("NULL"), "TokenType::TK_NULL") + .entry(UncasedStr::new("NULLS"), "TokenType::TK_NULLS") + .entry(UncasedStr::new("OF"), "TokenType::TK_OF") + .entry(UncasedStr::new("OFFSET"), "TokenType::TK_OFFSET") + .entry(UncasedStr::new("ON"), "TokenType::TK_ON") + .entry(UncasedStr::new("OR"), "TokenType::TK_OR") + .entry(UncasedStr::new("ORDER"), "TokenType::TK_ORDER") + .entry(UncasedStr::new("OTHERS"), "TokenType::TK_OTHERS") + .entry(UncasedStr::new("OUTER"), "TokenType::TK_JOIN_KW") + .entry(UncasedStr::new("OVER"), "TokenType::TK_OVER") + .entry(UncasedStr::new("PARTITION"), "TokenType::TK_PARTITION") + .entry(UncasedStr::new("PLAN"), "TokenType::TK_PLAN") + .entry(UncasedStr::new("PRAGMA"), "TokenType::TK_PRAGMA") + .entry(UncasedStr::new("PRECEDING"), "TokenType::TK_PRECEDING") + .entry(UncasedStr::new("PRIMARY"), "TokenType::TK_PRIMARY") + .entry(UncasedStr::new("QUERY"), "TokenType::TK_QUERY") + .entry(UncasedStr::new("RAISE"), "TokenType::TK_RAISE") + .entry(UncasedStr::new("RANGE"), "TokenType::TK_RANGE") + .entry(UncasedStr::new("READONLY"), "TokenType::TK_READONLY") + .entry(UncasedStr::new("RECURSIVE"), "TokenType::TK_RECURSIVE") + .entry(UncasedStr::new("REFERENCES"), "TokenType::TK_REFERENCES") + .entry(UncasedStr::new("REGEXP"), "TokenType::TK_LIKE_KW") + .entry(UncasedStr::new("REINDEX"), "TokenType::TK_REINDEX") + .entry(UncasedStr::new("RELEASE"), "TokenType::TK_RELEASE") + .entry(UncasedStr::new("RENAME"), "TokenType::TK_RENAME") + .entry(UncasedStr::new("REPLACE"), "TokenType::TK_REPLACE") + .entry(UncasedStr::new("RETURNING"), "TokenType::TK_RETURNING") + .entry(UncasedStr::new("RESTRICT"), "TokenType::TK_RESTRICT") + .entry(UncasedStr::new("RIGHT"), "TokenType::TK_JOIN_KW") + .entry(UncasedStr::new("ROLLBACK"), "TokenType::TK_ROLLBACK") + .entry(UncasedStr::new("ROW"), "TokenType::TK_ROW") + .entry(UncasedStr::new("ROWS"), "TokenType::TK_ROWS") + .entry(UncasedStr::new("SAVEPOINT"), "TokenType::TK_SAVEPOINT") + .entry(UncasedStr::new("SELECT"), "TokenType::TK_SELECT") + .entry(UncasedStr::new("SET"), "TokenType::TK_SET") + .entry(UncasedStr::new("TABLE"), "TokenType::TK_TABLE") + .entry(UncasedStr::new("TEMP"), "TokenType::TK_TEMP") + .entry(UncasedStr::new("TEMPORARY"), "TokenType::TK_TEMP") + .entry(UncasedStr::new("THEN"), "TokenType::TK_THEN") + .entry(UncasedStr::new("TIES"), "TokenType::TK_TIES") + .entry(UncasedStr::new("TO"), "TokenType::TK_TO") + .entry(UncasedStr::new("TRANSACTION"), "TokenType::TK_TRANSACTION") + .entry(UncasedStr::new("TRIGGER"), "TokenType::TK_TRIGGER") + .entry(UncasedStr::new("UNBOUNDED"), "TokenType::TK_UNBOUNDED") + .entry(UncasedStr::new("UNION"), "TokenType::TK_UNION") + .entry(UncasedStr::new("UNIQUE"), "TokenType::TK_UNIQUE") + .entry(UncasedStr::new("UPDATE"), "TokenType::TK_UPDATE") + .entry(UncasedStr::new("USING"), "TokenType::TK_USING") + .entry(UncasedStr::new("VACUUM"), "TokenType::TK_VACUUM") + .entry(UncasedStr::new("VALUES"), "TokenType::TK_VALUES") + .entry(UncasedStr::new("VIEW"), "TokenType::TK_VIEW") + .entry(UncasedStr::new("VIRTUAL"), "TokenType::TK_VIRTUAL") + .entry(UncasedStr::new("WHEN"), "TokenType::TK_WHEN") + .entry(UncasedStr::new("WHERE"), "TokenType::TK_WHERE") + .entry(UncasedStr::new("WINDOW"), "TokenType::TK_WINDOW") + .entry(UncasedStr::new("WITH"), "TokenType::TK_WITH") + .entry(UncasedStr::new("WITHOUT"), "TokenType::TK_WITHOUT") + .build() + )?; + + println!("cargo:rerun-if-changed=third_party/lemon/lemon.c"); + println!("cargo:rerun-if-changed=third_party/lemon/lempar.rs"); + println!("cargo:rerun-if-changed=src/parser/parse.y"); + // TODO examples/simple.y if test + Ok(()) +} diff --git a/vendored/sqlite3-parser/examples/simple.y b/vendored/sqlite3-parser/examples/simple.y new file mode 100644 index 0000000000..835418deb8 --- /dev/null +++ b/vendored/sqlite3-parser/examples/simple.y @@ -0,0 +1,123 @@ +%token_type { i32 } + +// An extra argument to the constructor for the parser, which is available +// to all actions. +%extra_context {ctx: Context} + +%left PLUS MINUS. +%left DIVIDE TIMES. + +%include { + +use log::{debug, error, log_enabled, Level, LevelFilter, Metadata, Record, SetLoggerError}; + +pub struct Context { + expr: Option, +} + +#[derive(Debug)] +pub enum Operator { + Add, + Substract, + Multiply, + Divide, +} + +#[derive(Debug)] +pub enum Expr { + Number(i32), + Binary(Operator, Box, Box), +} +impl Expr { + fn binary(op: Operator, lhs: Expr, rhs: Expr) -> Expr { + Expr::Binary(op, Box::new(lhs), Box::new(rhs)) + } +} + +fn main() { + init_logger().unwrap(); + + let r = Context { expr: None }; + let mut p = yyParser::new(r); + p.Parse(TokenType::INTEGER, Some(5)); + p.Parse(TokenType::PLUS, None); + p.Parse(TokenType::INTEGER, Some(10)); + p.Parse(TokenType::TIMES, None); + p.Parse(TokenType::INTEGER, Some(4)); + p.Parse(TokenType::EOF, None); + p.ParseFinalize(); + let s = format!("{:?}", p.ctx.expr); + assert_eq!(s, "Some(Binary(Add, Number(5), Binary(Multiply, Number(10), Number(4))))"); + + let r = Context { expr: None }; + let mut p = yyParser::new(r); + p.Parse(TokenType::INTEGER, Some(15)); + p.Parse(TokenType::DIVIDE, None); + p.Parse(TokenType::INTEGER, Some(5)); + p.Parse(TokenType::EOF, None); + p.ParseFinalize(); + let s = format!("{:?}", p.ctx.expr); + assert_eq!(s, "Some(Binary(Divide, Number(15), Number(5)))"); + + let r = Context { expr: None }; + let mut p = yyParser::new(r); + p.Parse(TokenType::INTEGER, Some(50)); + p.Parse(TokenType::PLUS, None); + p.Parse(TokenType::INTEGER, Some(125)); + p.Parse(TokenType::EOF, None); + p.ParseFinalize(); + let s = format!("{:?}", p.ctx.expr); + assert_eq!(s, "Some(Binary(Add, Number(50), Number(125)))"); + + let r = Context { expr: None }; + let mut p = yyParser::new(r); + p.Parse(TokenType::INTEGER, Some(50)); + p.Parse(TokenType::TIMES, None); + p.Parse(TokenType::INTEGER, Some(125)); + p.Parse(TokenType::PLUS, None); + p.Parse(TokenType::INTEGER, Some(125)); + p.Parse(TokenType::EOF, None); + p.ParseFinalize(); + let s = format!("{:?}", p.ctx.expr); + assert_eq!(s, "Some(Binary(Add, Binary(Multiply, Number(50), Number(125)), Number(125)))"); +} + +static LOGGER: Logger = Logger; +struct Logger; + +impl log::Log for Logger { + fn enabled(&self, metadata: &Metadata) -> bool { + metadata.level() <= Level::Debug + } + + fn log(&self, record: &Record) { + if self.enabled(record.metadata()) { + eprintln!("{} - {}", record.level(), record.args()); + } + } + + fn flush(&self) { + } +} + +fn init_logger() -> Result<(), SetLoggerError> { + log::set_logger(&LOGGER)?; + log::set_max_level(LevelFilter::Debug); + Ok(()) +} +} + +%syntax_error { + let _ = yymajor; + println!("near token {:?}: syntax error", yyminor); +} + +program ::= expr(A). { self.ctx.expr = Some(A); } + +%type expr { Expr } +expr(A) ::= expr(B) MINUS expr(C). { A = Expr::binary(Operator::Substract, B, C); } +expr(A) ::= expr(B) PLUS expr(C). { A = Expr::binary(Operator::Add, B, C); } +expr(A) ::= expr(B) TIMES expr(C). { A = Expr::binary(Operator::Multiply, B, C); } +expr(A) ::= expr(B) DIVIDE expr(C). { A = Expr::binary(Operator::Divide, B, C); } + +expr(A) ::= INTEGER(B). { A = Expr::Number(B.unwrap()); } diff --git a/vendored/sqlite3-parser/examples/sql_check.rs b/vendored/sqlite3-parser/examples/sql_check.rs new file mode 100644 index 0000000000..1e7b05cf76 --- /dev/null +++ b/vendored/sqlite3-parser/examples/sql_check.rs @@ -0,0 +1,55 @@ +use fallible_iterator::FallibleIterator; +use std::env; +use std::fs::read; +use std::panic; + +use sqlite3_parser::lexer::sql::Parser; + +/// Parse specified files and check all commands. +fn main() { + env_logger::init(); + let args = env::args(); + for arg in args.skip(1) { + println!("{arg}"); + let result = panic::catch_unwind(|| { + let input = read(arg.clone()).unwrap(); + let mut parser = Parser::new(&input); + loop { + match parser.next() { + Ok(None) => break, + Err(err) => { + eprintln!("Err: {err} in {arg}"); + break; + } + Ok(Some(cmd)) => { + let input = cmd.to_string(); + let mut checker = Parser::new(input.as_bytes()); + match checker.next() { + Err(err) => { + eprintln!( + "Check Err in {}:{}, {} in\n{}\n{:?}", + arg, + parser.line(), + err, + input, + cmd + ); + } + Ok(None) => { + eprintln!("Check Err in {}:{}, {:?}", arg, parser.line(), cmd); + } + Ok(Some(check)) => { + if cmd != check { + eprintln!("{cmd:?}\n<>\n{check:?}"); + } + } + } + } + } + } + }); + if let Err(e) = result { + eprintln!("Panic: {e:?} in {arg}"); + } + } +} diff --git a/vendored/sqlite3-parser/examples/sql_cmd.rs b/vendored/sqlite3-parser/examples/sql_cmd.rs new file mode 100644 index 0000000000..11b6b39b0d --- /dev/null +++ b/vendored/sqlite3-parser/examples/sql_cmd.rs @@ -0,0 +1,22 @@ +use fallible_iterator::FallibleIterator; +use sqlite3_parser::lexer::sql::Parser; + +/// Parse a string. +// RUST_LOG=sqlite3Parser=debug +fn main() { + env_logger::init(); + let arg = "PRAGMA parser_trace=ON;"; + let mut parser = Parser::new(arg.as_bytes()); + loop { + match parser.next() { + Ok(None) => break, + Err(err) => { + eprintln!("Err: {err} in {arg}"); + break; + } + Ok(Some(cmd)) => { + println!("{cmd}"); + } + } + } +} diff --git a/vendored/sqlite3-parser/examples/sql_cmds.rs b/vendored/sqlite3-parser/examples/sql_cmds.rs new file mode 100644 index 0000000000..8ebdeb09bd --- /dev/null +++ b/vendored/sqlite3-parser/examples/sql_cmds.rs @@ -0,0 +1,34 @@ +use fallible_iterator::FallibleIterator; +use std::env; +use std::fs::read; +use std::panic; + +use sqlite3_parser::lexer::sql::Parser; + +/// Parse specified files and print all commands. +fn main() { + env_logger::init(); + let args = env::args(); + for arg in args.skip(1) { + println!("{arg}"); + let result = panic::catch_unwind(|| { + let input = read(arg.clone()).unwrap(); + let mut parser = Parser::new(input.as_ref()); + loop { + match parser.next() { + Ok(None) => break, + Err(err) => { + eprintln!("Err: {err} in {arg}"); + break; + } + Ok(Some(cmd)) => { + println!("{cmd}"); + } + } + } + }); + if let Err(e) = result { + eprintln!("Panic: {e:?} in {arg}"); + } + } +} diff --git a/vendored/sqlite3-parser/examples/sql_tokens.rs b/vendored/sqlite3-parser/examples/sql_tokens.rs new file mode 100644 index 0000000000..a5cb931c1f --- /dev/null +++ b/vendored/sqlite3-parser/examples/sql_tokens.rs @@ -0,0 +1,91 @@ +use sqlite3_parser::lexer::sql::{TokenType, Tokenizer}; +use sqlite3_parser::lexer::Scanner; + +use std::env; +use std::fs::read; +use std::i64; +use std::str; + +/// Tokenize specified files (and do some checks) +fn main() { + use TokenType::*; + let args = env::args(); + for arg in args.skip(1) { + let input = read(arg.clone()).unwrap(); + let tokenizer = Tokenizer::new(); + let mut s = Scanner::new(tokenizer); + loop { + match s.scan(&input) { + Ok((_, None, _)) => break, + Err(err) => { + //eprintln!("{} at line: {}, column: {}", err, s.line(), s.column()); + eprintln!("Err: {err} in {arg}"); + break; + } + Ok((_, Some((token, token_type)), _)) => match token_type { + TK_TEMP => debug_assert!( + b"TEMP".eq_ignore_ascii_case(token) + || b"TEMPORARY".eq_ignore_ascii_case(token) + ), + TK_EQ => debug_assert!(b"=" == token || b"==" == token), + TK_NE => debug_assert!(b"<>" == token || b"!=" == token), + //TK_STRING => debug_assert!(), + //TK_ID => debug_assert!(), + //TK_VARIABLE => debug_assert!(), + TK_BLOB => debug_assert!( + token.len() % 2 == 0 && token.iter().all(|b| b.is_ascii_hexdigit()) + ), + TK_INTEGER => { + if token.len() > 2 + && token[0] == b'0' + && (token[1] == b'x' || token[1] == b'X') + { + if let Err(err) = + i64::from_str_radix(str::from_utf8(&token[2..]).unwrap(), 16) + { + eprintln!("Err: {err} in {arg}"); + } + } else { + /*let raw = str::from_utf8(token).unwrap(); + let res = raw.parse::(); + if res.is_err() { + eprintln!("Err: {} in {}", res.unwrap_err(), arg); + }*/ + debug_assert!(token.iter().all(|b| b.is_ascii_digit())) + } + } + TK_FLOAT => { + debug_assert!(str::from_utf8(token).unwrap().parse::().is_ok()) + } + TK_CTIME_KW => debug_assert!( + b"CURRENT_DATE".eq_ignore_ascii_case(token) + || b"CURRENT_TIME".eq_ignore_ascii_case(token) + || b"CURRENT_TIMESTAMP".eq_ignore_ascii_case(token) + ), + TK_JOIN_KW => debug_assert!( + b"CROSS".eq_ignore_ascii_case(token) + || b"FULL".eq_ignore_ascii_case(token) + || b"INNER".eq_ignore_ascii_case(token) + || b"LEFT".eq_ignore_ascii_case(token) + || b"NATURAL".eq_ignore_ascii_case(token) + || b"OUTER".eq_ignore_ascii_case(token) + || b"RIGHT".eq_ignore_ascii_case(token) + ), + TK_LIKE_KW => debug_assert!( + b"GLOB".eq_ignore_ascii_case(token) + || b"LIKE".eq_ignore_ascii_case(token) + || b"REGEXP".eq_ignore_ascii_case(token) + ), + _ => match token_type.as_str() { + Some(str) => { + debug_assert!(str.eq_ignore_ascii_case(str::from_utf8(token).unwrap())) + } + _ => { + println!("'{}', {:?}", str::from_utf8(token).unwrap(), token_type); + } + }, + }, + } + } + } +} diff --git a/vendored/sqlite3-parser/sqlparser_bench/Cargo.toml b/vendored/sqlite3-parser/sqlparser_bench/Cargo.toml new file mode 100644 index 0000000000..6c7b36198a --- /dev/null +++ b/vendored/sqlite3-parser/sqlparser_bench/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "sqlparser_bench" +version = "0.1.0" +authors = ["Dandandan "] +edition = "2018" + +[dependencies] +sqlite3-parser = { path = "..", default-features = false, features = ["YYNOERRORRECOVERY"] } +fallible-iterator = "0.3" + +[dev-dependencies] +criterion = "0.5" + +[[bench]] +name = "sqlparser_bench" +harness = false diff --git a/vendored/sqlite3-parser/sqlparser_bench/README.md b/vendored/sqlite3-parser/sqlparser_bench/README.md new file mode 100644 index 0000000000..6bdda8accf --- /dev/null +++ b/vendored/sqlite3-parser/sqlparser_bench/README.md @@ -0,0 +1 @@ +Adapted from https://github.com/ballista-compute/sqlparser-rs/tree/main/sqlparser_bench \ No newline at end of file diff --git a/vendored/sqlite3-parser/sqlparser_bench/benches/sqlparser_bench.rs b/vendored/sqlite3-parser/sqlparser_bench/benches/sqlparser_bench.rs new file mode 100644 index 0000000000..c069235864 --- /dev/null +++ b/vendored/sqlite3-parser/sqlparser_bench/benches/sqlparser_bench.rs @@ -0,0 +1,48 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use criterion::{criterion_group, criterion_main, Criterion}; +use fallible_iterator::FallibleIterator; +use sqlite3_parser::lexer::sql::Parser; + +fn basic_queries(c: &mut Criterion) { + let mut group = c.benchmark_group("sqlparser-rs parsing benchmark"); + + let string = "SELECT * FROM `table` WHERE 1 = 1"; + group.bench_function("sqlparser::select", |b| { + b.iter(|| { + let mut parser = Parser::new(string.as_bytes()); + parser.next() + }); + }); + + let with_query = " + WITH derived AS ( + SELECT MAX(a) AS max_a, + COUNT(b) AS b_num, + user_id + FROM `TABLE` + GROUP BY user_id + ) + SELECT * FROM `table` + LEFT JOIN derived USING (user_id) + "; + group.bench_function("sqlparser::with_select", |b| { + b.iter(|| { + let mut parser = Parser::new(with_query.as_bytes()); + parser.next() + }); + }); +} + +criterion_group!(benches, basic_queries); +criterion_main!(benches); diff --git a/vendored/sqlite3-parser/src/dialect/mod.rs b/vendored/sqlite3-parser/src/dialect/mod.rs new file mode 100644 index 0000000000..e0266f10e4 --- /dev/null +++ b/vendored/sqlite3-parser/src/dialect/mod.rs @@ -0,0 +1,405 @@ +//! SQLite dialect + +use std::fmt::Formatter; +use std::str; +use uncased::UncasedStr; + +mod token; +pub use token::TokenType; + +/// Token value (lexeme) +pub struct Token(pub usize, pub Option, pub usize); + +pub(crate) fn sentinel(start: usize) -> Token { + Token(start, None, start) +} + +impl Token { + pub fn unwrap(self) -> String { + self.1.unwrap() + } + pub fn take(&mut self) -> Self { + Token(self.0, self.1.take(), self.2) + } +} + +impl std::fmt::Debug for Token { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + f.debug_tuple("Token").field(&self.1).finish() + } +} + +impl TokenType { + // TODO try Cow<&'static, str> (Borrowed<&'static str> for keyword and Owned for below), + // => Syntax error on keyword will be better + // => `from_token` will become unnecessary + pub(crate) fn to_token(self, start: usize, value: &[u8], end: usize) -> Token { + Token( + start, + match self { + TokenType::TK_CTIME_KW => Some(from_bytes(value)), + TokenType::TK_JOIN_KW => Some(from_bytes(value)), + TokenType::TK_LIKE_KW => Some(from_bytes(value)), + TokenType::TK_PTR => Some(from_bytes(value)), + // Identifiers + TokenType::TK_STRING => Some(from_bytes(value)), + TokenType::TK_ID => Some(from_bytes(value)), + TokenType::TK_VARIABLE => Some(from_bytes(value)), + // Values + TokenType::TK_ANY => Some(from_bytes(value)), + TokenType::TK_BLOB => Some(from_bytes(value)), + TokenType::TK_INTEGER => Some(from_bytes(value)), + TokenType::TK_FLOAT => Some(from_bytes(value)), + _ => None, + }, + end, + ) + } +} + +fn from_bytes(bytes: &[u8]) -> String { + unsafe { str::from_utf8_unchecked(bytes).to_owned() } +} + +include!(concat!(env!("OUT_DIR"), "/keywords.rs")); +pub(crate) const MAX_KEYWORD_LEN: usize = 17; + +pub fn keyword_token(word: &[u8]) -> Option { + KEYWORDS + .get(UncasedStr::new(unsafe { str::from_utf8_unchecked(word) })) + .cloned() +} + +pub(crate) fn is_identifier(name: &str) -> bool { + if name.is_empty() { + return false; + } + let bytes = name.as_bytes(); + is_identifier_start(bytes[0]) + && (bytes.len() == 1 || bytes[1..].iter().all(|b| is_identifier_continue(*b))) +} + +pub(crate) fn is_identifier_start(b: u8) -> bool { + b.is_ascii_uppercase() || b == b'_' || b.is_ascii_lowercase() || b > b'\x7F' +} + +pub(crate) fn is_identifier_continue(b: u8) -> bool { + b == b'$' + || b.is_ascii_digit() + || b.is_ascii_uppercase() + || b == b'_' + || b.is_ascii_lowercase() + || b > b'\x7F' +} + +// keyword may become an identifier +// see %fallback in parse.y +pub(crate) fn from_token(ty: u16, value: Token) -> String { + use TokenType::*; + if let Some(str) = value.1 { + return str; + } + match ty { + x if x == TK_ABORT as u16 => "ABORT".to_owned(), + x if x == TK_ACTION as u16 => "ACTION".to_owned(), + //x if x == TK_ADD as u16 => "ADD".to_owned(), + x if x == TK_AFTER as u16 => "AFTER".to_owned(), + //x if x == TK_ALL as u16 => "ALL".to_owned(), + //x if x == TK_ALTER as u16 => "ALTER".to_owned(), + x if x == TK_ALWAYS as u16 => "ALWAYS".to_owned(), + x if x == TK_ANALYZE as u16 => "ANALYZE".to_owned(), + //x if x == TK_AND as u16 => "AND".to_owned(), + //x if x == TK_AS as u16 => "AS".to_owned(), + x if x == TK_ASC as u16 => "ASC".to_owned(), + x if x == TK_ATTACH as u16 => "ATTACH".to_owned(), + //x if x == TK_AUTOINCR as u16 => "AUTOINCREMENT".to_owned(), + x if x == TK_BEFORE as u16 => "BEFORE".to_owned(), + x if x == TK_BEGIN as u16 => "BEGIN".to_owned(), + //x if x == TK_BETWEEN as u16 => "BETWEEN".to_owned(), + x if x == TK_BY as u16 => "BY".to_owned(), + x if x == TK_CASCADE as u16 => "CASCADE".to_owned(), + //x if x == TK_CASE as u16 => "CASE".to_owned(), + x if x == TK_CAST as u16 => "CAST".to_owned(), + //x if x == TK_CHECK as u16 => "CHECK".to_owned(), + //x if x == TK_COLLATE as u16 => "COLLATE".to_owned(), + x if x == TK_COLUMNKW as u16 => "COLUMN".to_owned(), + //x if x == TK_COMMIT as u16 => "COMMIT".to_owned(), + x if x == TK_CONFLICT as u16 => "CONFLICT".to_owned(), + //x if x == TK_CONSTRAINT as u16 => "CONSTRAINT".to_owned(), + //x if x == TK_CREATE as u16 => "CREATE".to_owned(), + x if x == TK_CURRENT as u16 => "CURRENT".to_owned(), + x if x == TK_DATABASE as u16 => "DATABASE".to_owned(), + x if x == TK_DEFAULT as u16 => "DEFAULT".to_owned(), + //x if x == TK_DEFERRABLE as u16 => "DEFERRABLE".to_owned(), + x if x == TK_DEFERRED as u16 => "DEFERRED".to_owned(), + x if x == TK_DELETE as u16 => "DELETE".to_owned(), + x if x == TK_DESC as u16 => "DESC".to_owned(), + x if x == TK_DETACH as u16 => "DETACH".to_owned(), + //x if x == TK_DISTINCT as u16 => "DISTINCT".to_owned(), + x if x == TK_DO as u16 => "DO".to_owned(), + //x if x == TK_DROP as u16 => "DROP".to_owned(), + x if x == TK_EACH as u16 => "EACH".to_owned(), + //x if x == TK_ELSE as u16 => "ELSE".to_owned(), + x if x == TK_END as u16 => "END".to_owned(), + //x if x == TK_ESCAPE as u16 => "ESCAPE".to_owned(), + //x if x == TK_EXCEPT as u16 => "EXCEPT".to_owned(), + x if x == TK_EXCLUDE as u16 => "EXCLUDE".to_owned(), + x if x == TK_EXCLUSIVE as u16 => "EXCLUSIVE".to_owned(), + //x if x == TK_EXISTS as u16 => "EXISTS".to_owned(), + x if x == TK_EXPLAIN as u16 => "EXPLAIN".to_owned(), + x if x == TK_FAIL as u16 => "FAIL".to_owned(), + //x if x == TK_FILTER as u16 => "FILTER".to_owned(), + x if x == TK_FIRST as u16 => "FIRST".to_owned(), + x if x == TK_FOLLOWING as u16 => "FOLLOWING".to_owned(), + x if x == TK_FOR as u16 => "FOR".to_owned(), + //x if x == TK_FOREIGN as u16 => "FOREIGN".to_owned(), + //x if x == TK_FROM as u16 => "FROM".to_owned(), + x if x == TK_GENERATED as u16 => "GENERATED".to_owned(), + //x if x == TK_GROUP as u16 => "GROUP".to_owned(), + x if x == TK_GROUPS as u16 => "GROUPS".to_owned(), + //x if x == TK_HAVING as u16 => "HAVING".to_owned(), + x if x == TK_IF as u16 => "IF".to_owned(), + x if x == TK_IGNORE as u16 => "IGNORE".to_owned(), + x if x == TK_IMMEDIATE as u16 => "IMMEDIATE".to_owned(), + //x if x == TK_IN as u16 => "IN".to_owned(), + //x if x == TK_INDEX as u16 => "INDEX".to_owned(), + x if x == TK_INDEXED as u16 => "INDEXED".to_owned(), + x if x == TK_INITIALLY as u16 => "INITIALLY".to_owned(), + //x if x == TK_INSERT as u16 => "INSERT".to_owned(), + x if x == TK_INSTEAD as u16 => "INSTEAD".to_owned(), + //x if x == TK_INTERSECT as u16 => "INTERSECT".to_owned(), + //x if x == TK_INTO as u16 => "INTO".to_owned(), + //x if x == TK_IS as u16 => "IS".to_owned(), + //x if x == TK_ISNULL as u16 => "ISNULL".to_owned(), + //x if x == TK_JOIN as u16 => "JOIN".to_owned(), + x if x == TK_KEY as u16 => "KEY".to_owned(), + x if x == TK_LAST as u16 => "LAST".to_owned(), + //x if x == TK_LIMIT as u16 => "LIMIT".to_owned(), + x if x == TK_MATCH as u16 => "MATCH".to_owned(), + x if x == TK_MATERIALIZED as u16 => "MATERIALIZED".to_owned(), + x if x == TK_NO as u16 => "NO".to_owned(), + //x if x == TK_NOT as u16 => "NOT".to_owned(), + //x if x == TK_NOTHING as u16 => "NOTHING".to_owned(), + //x if x == TK_NOTNULL as u16 => "NOTNULL".to_owned(), + //x if x == TK_NULL as u16 => "NULL".to_owned(), + x if x == TK_NULLS as u16 => "NULLS".to_owned(), + x if x == TK_OF as u16 => "OF".to_owned(), + x if x == TK_OFFSET as u16 => "OFFSET".to_owned(), + x if x == TK_ON as u16 => "ON".to_owned(), + //x if x == TK_OR as u16 => "OR".to_owned(), + //x if x == TK_ORDER as u16 => "ORDER".to_owned(), + x if x == TK_OTHERS as u16 => "OTHERS".to_owned(), + //x if x == TK_OVER as u16 => "OVER".to_owned(), + x if x == TK_PARTITION as u16 => "PARTITION".to_owned(), + x if x == TK_PLAN as u16 => "PLAN".to_owned(), + x if x == TK_PRAGMA as u16 => "PRAGMA".to_owned(), + x if x == TK_PRECEDING as u16 => "PRECEDING".to_owned(), + //x if x == TK_PRIMARY as u16 => "PRIMARY".to_owned(), + x if x == TK_QUERY as u16 => "QUERY".to_owned(), + x if x == TK_RAISE as u16 => "RAISE".to_owned(), + x if x == TK_RANGE as u16 => "RANGE".to_owned(), + x if x == TK_READONLY as u16 => "READONLY".to_owned(), + x if x == TK_RECURSIVE as u16 => "RECURSIVE".to_owned(), + //x if x == TK_REFERENCES as u16 => "REFERENCES".to_owned(), + x if x == TK_REINDEX as u16 => "REINDEX".to_owned(), + x if x == TK_RELEASE as u16 => "RELEASE".to_owned(), + x if x == TK_RENAME as u16 => "RENAME".to_owned(), + x if x == TK_REPLACE as u16 => "REPLACE".to_owned(), + //x if x == TK_RETURNING as u16 => "RETURNING".to_owned(), + x if x == TK_RESTRICT as u16 => "RESTRICT".to_owned(), + x if x == TK_ROLLBACK as u16 => "ROLLBACK".to_owned(), + x if x == TK_ROW as u16 => "ROW".to_owned(), + x if x == TK_ROWS as u16 => "ROWS".to_owned(), + x if x == TK_SAVEPOINT as u16 => "SAVEPOINT".to_owned(), + //x if x == TK_SELECT as u16 => "SELECT".to_owned(), + //x if x == TK_SET as u16 => "SET".to_owned(), + //x if x == TK_TABLE as u16 => "TABLE".to_owned(), + x if x == TK_TEMP as u16 => "TEMP".to_owned(), + //x if x == TK_TEMP as u16 => "TEMPORARY".to_owned(), + //x if x == TK_THEN as u16 => "THEN".to_owned(), + x if x == TK_TIES as u16 => "TIES".to_owned(), + //x if x == TK_TO as u16 => "TO".to_owned(), + //x if x == TK_TRANSACTION as u16 => "TRANSACTION".to_owned(), + x if x == TK_TRIGGER as u16 => "TRIGGER".to_owned(), + x if x == TK_UNBOUNDED as u16 => "UNBOUNDED".to_owned(), + //x if x == TK_UNION as u16 => "UNION".to_owned(), + //x if x == TK_UNIQUE as u16 => "UNIQUE".to_owned(), + //x if x == TK_UPDATE as u16 => "UPDATE".to_owned(), + //x if x == TK_USING as u16 => "USING".to_owned(), + x if x == TK_VACUUM as u16 => "VACUUM".to_owned(), + x if x == TK_VALUES as u16 => "VALUES".to_owned(), + x if x == TK_VIEW as u16 => "VIEW".to_owned(), + x if x == TK_VIRTUAL as u16 => "VIRTUAL".to_owned(), + //x if x == TK_WHEN as u16 => "WHEN".to_owned(), + //x if x == TK_WHERE as u16 => "WHERE".to_owned(), + //x if x == TK_WINDOW as u16 => "WINDOW".to_owned(), + x if x == TK_WITH as u16 => "WITH".to_owned(), + x if x == TK_WITHOUT as u16 => "WITHOUT".to_owned(), + _ => unreachable!(), + } +} + +impl TokenType { + pub const fn as_str(&self) -> Option<&'static str> { + use TokenType::*; + match self { + TK_ABORT => Some("ABORT"), + TK_ACTION => Some("ACTION"), + TK_ADD => Some("ADD"), + TK_AFTER => Some("AFTER"), + TK_ALL => Some("ALL"), + TK_ALTER => Some("ALTER"), + TK_ANALYZE => Some("ANALYZE"), + TK_ALWAYS => Some("ALWAYS"), + TK_AND => Some("AND"), + TK_AS => Some("AS"), + TK_ASC => Some("ASC"), + TK_ATTACH => Some("ATTACH"), + TK_AUTOINCR => Some("AUTOINCREMENT"), + TK_BEFORE => Some("BEFORE"), + TK_BEGIN => Some("BEGIN"), + TK_BETWEEN => Some("BETWEEN"), + TK_BY => Some("BY"), + TK_CASCADE => Some("CASCADE"), + TK_CASE => Some("CASE"), + TK_CAST => Some("CAST"), + TK_CHECK => Some("CHECK"), + TK_COLLATE => Some("COLLATE"), + TK_COLUMNKW => Some("COLUMN"), + TK_COMMIT => Some("COMMIT"), + TK_CONFLICT => Some("CONFLICT"), + TK_CONSTRAINT => Some("CONSTRAINT"), + TK_CREATE => Some("CREATE"), + TK_CURRENT => Some("CURRENT"), + TK_DATABASE => Some("DATABASE"), + TK_DEFAULT => Some("DEFAULT"), + TK_DEFERRABLE => Some("DEFERRABLE"), + TK_DEFERRED => Some("DEFERRED"), + TK_DELETE => Some("DELETE"), + TK_DESC => Some("DESC"), + TK_DETACH => Some("DETACH"), + TK_DISTINCT => Some("DISTINCT"), + TK_DO => Some("DO"), + TK_DROP => Some("DROP"), + TK_EACH => Some("EACH"), + TK_ELSE => Some("ELSE"), + TK_END => Some("END"), + TK_ESCAPE => Some("ESCAPE"), + TK_EXCEPT => Some("EXCEPT"), + TK_EXCLUDE => Some("EXCLUDE"), + TK_EXCLUSIVE => Some("EXCLUSIVE"), + TK_EXISTS => Some("EXISTS"), + TK_EXPLAIN => Some("EXPLAIN"), + TK_FAIL => Some("FAIL"), + TK_FILTER => Some("FILTER"), + TK_FIRST => Some("FIRST"), + TK_FOLLOWING => Some("FOLLOWING"), + TK_FOR => Some("FOR"), + TK_FOREIGN => Some("FOREIGN"), + TK_FROM => Some("FROM"), + TK_GENERATED => Some("GENERATED"), + TK_GROUP => Some("GROUP"), + TK_GROUPS => Some("GROUPS"), + TK_HAVING => Some("HAVING"), + TK_IF => Some("IF"), + TK_IGNORE => Some("IGNORE"), + TK_IMMEDIATE => Some("IMMEDIATE"), + TK_IN => Some("IN"), + TK_INDEX => Some("INDEX"), + TK_INDEXED => Some("INDEXED"), + TK_INITIALLY => Some("INITIALLY"), + TK_INSERT => Some("INSERT"), + TK_INSTEAD => Some("INSTEAD"), + TK_INTERSECT => Some("INTERSECT"), + TK_INTO => Some("INTO"), + TK_IS => Some("IS"), + TK_ISNULL => Some("ISNULL"), + TK_JOIN => Some("JOIN"), + TK_KEY => Some("KEY"), + TK_LAST => Some("LAST"), + TK_LIMIT => Some("LIMIT"), + TK_MATCH => Some("MATCH"), + TK_MATERIALIZED => Some("MATERIALIZED"), + TK_NO => Some("NO"), + TK_NOT => Some("NOT"), + TK_NOTHING => Some("NOTHING"), + TK_NOTNULL => Some("NOTNULL"), + TK_NULL => Some("NULL"), + TK_NULLS => Some("NULLS"), + TK_OF => Some("OF"), + TK_OFFSET => Some("OFFSET"), + TK_ON => Some("ON"), + TK_OR => Some("OR"), + TK_ORDER => Some("ORDER"), + TK_OTHERS => Some("OTHERS"), + TK_OVER => Some("OVER"), + TK_PARTITION => Some("PARTITION"), + TK_PLAN => Some("PLAN"), + TK_PRAGMA => Some("PRAGMA"), + TK_PRECEDING => Some("PRECEDING"), + TK_PRIMARY => Some("PRIMARY"), + TK_QUERY => Some("QUERY"), + TK_RAISE => Some("RAISE"), + TK_RANGE => Some("RANGE"), + TK_RECURSIVE => Some("RECURSIVE"), + TK_REFERENCES => Some("REFERENCES"), + TK_REINDEX => Some("REINDEX"), + TK_RELEASE => Some("RELEASE"), + TK_RENAME => Some("RENAME"), + TK_REPLACE => Some("REPLACE"), + TK_RETURNING => Some("RETURNING"), + TK_RESTRICT => Some("RESTRICT"), + TK_ROLLBACK => Some("ROLLBACK"), + TK_ROW => Some("ROW"), + TK_ROWS => Some("ROWS"), + TK_SAVEPOINT => Some("SAVEPOINT"), + TK_SELECT => Some("SELECT"), + TK_SET => Some("SET"), + TK_TABLE => Some("TABLE"), + TK_TEMP => Some("TEMP"), // or TEMPORARY + TK_TIES => Some("TIES"), + TK_THEN => Some("THEN"), + TK_TO => Some("TO"), + TK_TRANSACTION => Some("TRANSACTION"), + TK_TRIGGER => Some("TRIGGER"), + TK_UNBOUNDED => Some("UNBOUNDED"), + TK_UNION => Some("UNION"), + TK_UNIQUE => Some("UNIQUE"), + TK_UPDATE => Some("UPDATE"), + TK_USING => Some("USING"), + TK_VACUUM => Some("VACUUM"), + TK_VALUES => Some("VALUES"), + TK_VIEW => Some("VIEW"), + TK_VIRTUAL => Some("VIRTUAL"), + TK_WHEN => Some("WHEN"), + TK_WHERE => Some("WHERE"), + TK_WINDOW => Some("WINDOW"), + TK_WITH => Some("WITH"), + TK_WITHOUT => Some("WITHOUT"), + TK_BITAND => Some("&"), + TK_BITNOT => Some("~"), + TK_BITOR => Some("|"), + TK_COMMA => Some(","), + TK_CONCAT => Some("||"), + TK_DOT => Some("."), + TK_EQ => Some("="), // or == + TK_GT => Some(">"), + TK_GE => Some(">="), + TK_LP => Some("("), + TK_LSHIFT => Some("<<"), + TK_LE => Some("<="), + TK_LT => Some("<"), + TK_MINUS => Some("-"), + TK_NE => Some("<>"), // or != + TK_PLUS => Some("+"), + TK_REM => Some("%"), + TK_RP => Some(")"), + TK_RSHIFT => Some(">>"), + TK_SEMI => Some(";"), + TK_SLASH => Some("/"), + TK_STAR => Some("*"), + TK_READONLY => Some("READONLY"), + _ => None, + } + } +} diff --git a/vendored/sqlite3-parser/src/dialect/token.rs b/vendored/sqlite3-parser/src/dialect/token.rs new file mode 100644 index 0000000000..dbb0055cb5 --- /dev/null +++ b/vendored/sqlite3-parser/src/dialect/token.rs @@ -0,0 +1,180 @@ +//! All terminal symbols. + +/// Token classes +// Generated by lemon (parse.h). +// Renamed manually. +// To be keep in sync. +#[non_exhaustive] +#[allow(non_camel_case_types)] +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd)] +#[repr(u16)] +pub enum TokenType { + TK_EOF = 0, + TK_SEMI = 1, + TK_EXPLAIN = 2, + TK_QUERY = 3, + TK_PLAN = 4, + TK_BEGIN = 5, + TK_TRANSACTION = 6, + TK_DEFERRED = 7, + TK_IMMEDIATE = 8, + TK_EXCLUSIVE = 9, + TK_READONLY = 10, + TK_COMMIT = 11, + TK_END = 12, + TK_ROLLBACK = 13, + TK_SAVEPOINT = 14, + TK_RELEASE = 15, + TK_TO = 16, + TK_TABLE = 17, + TK_CREATE = 18, + TK_IF = 19, + TK_NOT = 20, + TK_EXISTS = 21, + TK_TEMP = 22, + TK_LP = 23, + TK_RP = 24, + TK_AS = 25, + TK_COMMA = 26, + TK_WITHOUT = 27, + TK_ABORT = 28, + TK_ACTION = 29, + TK_AFTER = 30, + TK_ANALYZE = 31, + TK_ASC = 32, + TK_ATTACH = 33, + TK_BEFORE = 34, + TK_BY = 35, + TK_CASCADE = 36, + TK_CAST = 37, + TK_CONFLICT = 38, + TK_DATABASE = 39, + TK_DESC = 40, + TK_DETACH = 41, + TK_EACH = 42, + TK_FAIL = 43, + TK_OR = 44, + TK_AND = 45, + TK_IS = 46, + TK_MATCH = 47, + TK_LIKE_KW = 48, + TK_BETWEEN = 49, + TK_IN = 50, + TK_ISNULL = 51, + TK_NOTNULL = 52, + TK_NE = 53, + TK_EQ = 54, + TK_GT = 55, + TK_LE = 56, + TK_LT = 57, + TK_GE = 58, + TK_ESCAPE = 59, + TK_ID = 60, + TK_COLUMNKW = 61, + TK_DO = 62, + TK_FOR = 63, + TK_IGNORE = 64, + TK_INITIALLY = 65, + TK_INSTEAD = 66, + TK_NO = 67, + TK_KEY = 68, + TK_OF = 69, + TK_OFFSET = 70, + TK_PRAGMA = 71, + TK_RAISE = 72, + TK_RECURSIVE = 73, + TK_REPLACE = 74, + TK_RESTRICT = 75, + TK_ROW = 76, + TK_ROWS = 77, + TK_TRIGGER = 78, + TK_VACUUM = 79, + TK_VIEW = 80, + TK_VIRTUAL = 81, + TK_WITH = 82, + TK_NULLS = 83, + TK_FIRST = 84, + TK_LAST = 85, + TK_CURRENT = 86, + TK_FOLLOWING = 87, + TK_PARTITION = 88, + TK_PRECEDING = 89, + TK_RANGE = 90, + TK_UNBOUNDED = 91, + TK_EXCLUDE = 92, + TK_GROUPS = 93, + TK_OTHERS = 94, + TK_TIES = 95, + TK_GENERATED = 96, + TK_ALWAYS = 97, + TK_MATERIALIZED = 98, + TK_REINDEX = 99, + TK_RENAME = 100, + TK_CTIME_KW = 101, + TK_ANY = 102, + TK_BITAND = 103, + TK_BITOR = 104, + TK_LSHIFT = 105, + TK_RSHIFT = 106, + TK_PLUS = 107, + TK_MINUS = 108, + TK_STAR = 109, + TK_SLASH = 110, + TK_REM = 111, + TK_CONCAT = 112, + TK_PTR = 113, + TK_COLLATE = 114, + TK_BITNOT = 115, + TK_ON = 116, + TK_INDEXED = 117, + TK_STRING = 118, + TK_JOIN_KW = 119, + TK_CONSTRAINT = 120, + TK_DEFAULT = 121, + TK_NULL = 122, + TK_PRIMARY = 123, + TK_UNIQUE = 124, + TK_CHECK = 125, + TK_REFERENCES = 126, + TK_AUTOINCR = 127, + TK_INSERT = 128, + TK_DELETE = 129, + TK_UPDATE = 130, + TK_SET = 131, + TK_DEFERRABLE = 132, + TK_FOREIGN = 133, + TK_DROP = 134, + TK_UNION = 135, + TK_ALL = 136, + TK_EXCEPT = 137, + TK_INTERSECT = 138, + TK_SELECT = 139, + TK_VALUES = 140, + TK_DISTINCT = 141, + TK_DOT = 142, + TK_FROM = 143, + TK_JOIN = 144, + TK_USING = 145, + TK_ORDER = 146, + TK_GROUP = 147, + TK_HAVING = 148, + TK_LIMIT = 149, + TK_WHERE = 150, + TK_RETURNING = 151, + TK_INTO = 152, + TK_NOTHING = 153, + TK_BLOB = 154, + TK_FLOAT = 155, + TK_INTEGER = 156, + TK_VARIABLE = 157, + TK_CASE = 158, + TK_WHEN = 159, + TK_THEN = 160, + TK_ELSE = 161, + TK_INDEX = 162, + TK_ALTER = 163, + TK_ADD = 164, + TK_WINDOW = 165, + TK_OVER = 166, + TK_FILTER = 167, +} diff --git a/vendored/sqlite3-parser/src/lexer/README.md b/vendored/sqlite3-parser/src/lexer/README.md new file mode 100644 index 0000000000..85301f11c3 --- /dev/null +++ b/vendored/sqlite3-parser/src/lexer/README.md @@ -0,0 +1,15 @@ +Streaming/Lazy/No-copy scanner. + +I tried [FallibleStreamingIterator](https://docs.rs/fallible-streaming-iterator/0.1.5/fallible_streaming_iterator/trait.FallibleStreamingIterator.html) but failed due to some errors reported by the borrow checker. +But our `Scanner` is a `FallibleStreamingIterator`: +> `FallibleStreamingIterator` differs from the standard library's `Iterator` trait in two ways: iteration can fail, resulting in an error, and only one element of the iteration is available at any time. +> While these iterators cannot be used with Rust `for` loops, `while let` loops offer a similar level of ergonomics. + +Currently, there are one `unsafe` block in the `scan` method used to bypass the borrow checker. +I don't know if it can be replaced with safe code. +But I am quite confident that it is safe. + +One concrete scanner is implemented: + - SQL lexer based on SQLite [tokenizer](http://www.sqlite.org/src/artifact?ci=trunk&filename=src/tokenize.c). + +[Bytes](https://doc.rust-lang.org/std/io/struct.Bytes.html) cannot be used because we don't want to copy token bytes twice. \ No newline at end of file diff --git a/vendored/sqlite3-parser/src/lexer/mod.rs b/vendored/sqlite3-parser/src/lexer/mod.rs new file mode 100644 index 0000000000..953ff76a7e --- /dev/null +++ b/vendored/sqlite3-parser/src/lexer/mod.rs @@ -0,0 +1,6 @@ +//! Streaming SQLite tokenizer + +mod scan; +pub mod sql; + +pub use scan::{ScanError, Scanner, Splitter}; diff --git a/vendored/sqlite3-parser/src/lexer/scan.rs b/vendored/sqlite3-parser/src/lexer/scan.rs new file mode 100644 index 0000000000..7b280df9e9 --- /dev/null +++ b/vendored/sqlite3-parser/src/lexer/scan.rs @@ -0,0 +1,166 @@ +//! Adaptation/port of [Go scanner](http://tip.golang.org/pkg/bufio/#Scanner). + +use log::debug; + +use std::error::Error; +use std::fmt; +use std::io; + +pub trait ScanError: Error + From + Sized { + fn position(&mut self, line: u64, column: usize); +} + +/// The `(&[u8], TokenType)` is the token. +/// And the `usize` is the amount of bytes to consume. +type SplitResult<'input, TokenType, Error> = + Result<(Option<(&'input [u8], TokenType)>, usize), Error>; + +/// Split function used to tokenize the input +pub trait Splitter: Sized { + type Error: ScanError; + //type Item: ?Sized; + type TokenType; + + /// The arguments are an initial substring of the remaining unprocessed + /// data. + /// + /// If the returned error is non-nil, scanning stops and the error + /// is returned to the client. + /// + /// The function is never called with an empty data slice. + fn split<'input>( + &mut self, + data: &'input [u8], + ) -> SplitResult<'input, Self::TokenType, Self::Error>; +} + +/// Like a `BufReader` but with a growable buffer. +/// Successive calls to the `scan` method will step through the 'tokens' +/// of a file, skipping the bytes between the tokens. +/// +/// Scanning stops unrecoverably at EOF, the first I/O error, or a token too +/// large to fit in the buffer. When a scan stops, the reader may have +/// advanced arbitrarily far past the last token. +pub struct Scanner { + /// offset in `input` + offset: usize, + /// mark + mark: (usize, u64, usize), + /// The function to tokenize the input. + splitter: S, + /// current line number + line: u64, + /// current column number (byte offset, not char offset) + column: usize, +} + +impl Scanner { + pub fn new(splitter: S) -> Scanner { + Scanner { + offset: 0, + mark: (0, 0, 0), + splitter, + line: 1, + column: 1, + } + } + + /// Current line number + pub fn line(&self) -> u64 { + self.line + } + + /// Current column number (byte offset, not char offset) + pub fn column(&self) -> usize { + self.column + } + + pub fn splitter(&self) -> &S { + &self.splitter + } + + pub fn mark(&mut self) { + self.mark = (self.offset, self.line, self.column); + } + pub fn reset_to_mark(&mut self) { + (self.offset, self.line, self.column) = self.mark; + } + + /// Reset the scanner such that it behaves as if it had never been used. + pub fn reset(&mut self) { + self.offset = 0; + self.line = 1; + self.column = 1; + } +} + +type ScanResult<'input, TokenType, Error> = + Result<(usize, Option<(&'input [u8], TokenType)>, usize), Error>; + +impl Scanner { + /// Advance the Scanner to next token. + /// Return the token as a byte slice. + /// Return `None` when the end of the input is reached. + /// Return any error that occurs while reading the input. + pub fn scan<'input>( + &mut self, + input: &'input [u8], + ) -> ScanResult<'input, S::TokenType, S::Error> { + debug!(target: "scanner", "scan(line: {}, column: {})", self.line, self.column); + // Loop until we have a token. + loop { + // See if we can get a token with what we already have. + if self.offset < input.len() { + let data = &input[self.offset..]; + match self.splitter.split(data) { + Err(mut e) => { + e.position(self.line, self.column); + return Err(e); + } + Ok((None, 0)) => { + // Done + } + Ok((None, amt)) => { + // Ignore/skip this data + self.consume(data, amt); + continue; + } + Ok((tok, amt)) => { + let start = self.offset; + self.consume(data, amt); + return Ok((start, tok, self.offset)); + } + } + } + // We cannot generate a token with what we are holding. + // we are done. + return Ok((self.offset, None, self.offset)); + } + } + + /// Consume `amt` bytes of the buffer. + fn consume(&mut self, data: &[u8], amt: usize) { + debug!(target: "scanner", "consume({})", amt); + debug_assert!(amt <= data.len()); + for byte in &data[..amt] { + if *byte == b'\n' { + self.line += 1; + self.column = 1; + } else { + self.column += 1; + } + } + self.offset += amt; + } +} + +impl fmt::Debug for Scanner { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("Scanner") + .field("offset", &self.offset) + .field("mark", &self.mark) + .field("line", &self.line) + .field("column", &self.column) + .finish() + } +} diff --git a/vendored/sqlite3-parser/src/lexer/sql/error.rs b/vendored/sqlite3-parser/src/lexer/sql/error.rs new file mode 100644 index 0000000000..59bc682844 --- /dev/null +++ b/vendored/sqlite3-parser/src/lexer/sql/error.rs @@ -0,0 +1,83 @@ +use std::error; +use std::fmt; +use std::io; + +use crate::lexer::scan::ScanError; +use crate::parser::ParserError; + +#[non_exhaustive] +#[derive(Debug)] +pub enum Error { + /// I/O Error + Io(io::Error), + UnrecognizedToken(Option<(u64, usize)>), + UnterminatedLiteral(Option<(u64, usize)>), + UnterminatedBracket(Option<(u64, usize)>), + UnterminatedBlockComment(Option<(u64, usize)>), + BadVariableName(Option<(u64, usize)>), + BadNumber(Option<(u64, usize)>), + ExpectedEqualsSign(Option<(u64, usize)>), + MalformedBlobLiteral(Option<(u64, usize)>), + MalformedHexInteger(Option<(u64, usize)>), + ParserError(ParserError, Option<(u64, usize)>), +} + +impl fmt::Display for Error { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match *self { + Error::Io(ref err) => err.fmt(f), + Error::UnrecognizedToken(pos) => write!(f, "unrecognized token at {:?}", pos.unwrap()), + Error::UnterminatedLiteral(pos) => { + write!(f, "non-terminated literal at {:?}", pos.unwrap()) + } + Error::UnterminatedBracket(pos) => { + write!(f, "non-terminated bracket at {:?}", pos.unwrap()) + } + Error::UnterminatedBlockComment(pos) => { + write!(f, "non-terminated block comment at {:?}", pos.unwrap()) + } + Error::BadVariableName(pos) => write!(f, "bad variable name at {:?}", pos.unwrap()), + Error::BadNumber(pos) => write!(f, "bad number at {:?}", pos.unwrap()), + Error::ExpectedEqualsSign(pos) => write!(f, "expected = sign at {:?}", pos.unwrap()), + Error::MalformedBlobLiteral(pos) => { + write!(f, "malformed blob literal at {:?}", pos.unwrap()) + } + Error::MalformedHexInteger(pos) => { + write!(f, "malformed hex integer at {:?}", pos.unwrap()) + } + Error::ParserError(ref msg, pos) => write!(f, "{} at {:?}", msg, pos.unwrap()), + } + } +} + +impl error::Error for Error {} + +impl From for Error { + fn from(err: io::Error) -> Error { + Error::Io(err) + } +} + +impl From for Error { + fn from(err: ParserError) -> Error { + Error::ParserError(err, None) + } +} + +impl ScanError for Error { + fn position(&mut self, line: u64, column: usize) { + match *self { + Error::Io(_) => {} + Error::UnrecognizedToken(ref mut pos) => *pos = Some((line, column)), + Error::UnterminatedLiteral(ref mut pos) => *pos = Some((line, column)), + Error::UnterminatedBracket(ref mut pos) => *pos = Some((line, column)), + Error::UnterminatedBlockComment(ref mut pos) => *pos = Some((line, column)), + Error::BadVariableName(ref mut pos) => *pos = Some((line, column)), + Error::BadNumber(ref mut pos) => *pos = Some((line, column)), + Error::ExpectedEqualsSign(ref mut pos) => *pos = Some((line, column)), + Error::MalformedBlobLiteral(ref mut pos) => *pos = Some((line, column)), + Error::MalformedHexInteger(ref mut pos) => *pos = Some((line, column)), + Error::ParserError(_, ref mut pos) => *pos = Some((line, column)), + } + } +} diff --git a/vendored/sqlite3-parser/src/lexer/sql/mod.rs b/vendored/sqlite3-parser/src/lexer/sql/mod.rs new file mode 100644 index 0000000000..906d7e934c --- /dev/null +++ b/vendored/sqlite3-parser/src/lexer/sql/mod.rs @@ -0,0 +1,644 @@ +//! Adaptation/port of [`SQLite` tokenizer](http://www.sqlite.org/src/artifact?ci=trunk&filename=src/tokenize.c) +use fallible_iterator::FallibleIterator; +use memchr::memchr; + +pub use crate::dialect::TokenType; +use crate::dialect::TokenType::*; +use crate::dialect::{ + is_identifier_continue, is_identifier_start, keyword_token, sentinel, MAX_KEYWORD_LEN, +}; +use crate::parser::ast::Cmd; +use crate::parser::parse::{yyParser, YYCODETYPE}; +use crate::parser::Context; + +mod error; +#[cfg(test)] +mod test; + +use crate::lexer::scan::ScanError; +use crate::lexer::scan::Splitter; +use crate::lexer::Scanner; +pub use crate::parser::ParserError; +pub use error::Error; + +// TODO Extract scanning stuff and move this into the parser crate +// to make possible to use the tokenizer without depending on the parser... + +pub struct Parser<'input> { + input: &'input [u8], + scanner: Scanner, + parser: yyParser<'input>, +} + +impl<'input> Parser<'input> { + pub fn new(input: &'input [u8]) -> Parser<'input> { + let lexer = Tokenizer::new(); + let scanner = Scanner::new(lexer); + let ctx = Context::new(input); + let parser = yyParser::new(ctx); + Parser { + input, + scanner, + parser, + } + } + + pub fn reset(&mut self, input: &'input [u8]) { + self.input = input; + self.scanner.reset(); + } + + pub fn line(&self) -> u64 { + self.scanner.line() + } + pub fn column(&self) -> usize { + self.scanner.column() + } +} + +/* + ** Return the id of the next token in input. + */ +fn get_token(scanner: &mut Scanner, input: &[u8]) -> Result { + let mut t = { + let (_, token_type) = match scanner.scan(input)? { + (_, None, _) => { + return Ok(TK_EOF); + } + (_, Some(tuple), _) => tuple, + }; + token_type + }; + if t == TK_ID + || t == TK_STRING + || t == TK_JOIN_KW + || t == TK_WINDOW + || t == TK_OVER + || yyParser::parse_fallback(t as YYCODETYPE) == TK_ID as YYCODETYPE + { + t = TK_ID; + } + Ok(t) +} + +/* + ** The following three functions are called immediately after the tokenizer + ** reads the keywords WINDOW, OVER and FILTER, respectively, to determine + ** whether the token should be treated as a keyword or an SQL identifier. + ** This cannot be handled by the usual lemon %fallback method, due to + ** the ambiguity in some constructions. e.g. + ** + ** SELECT sum(x) OVER ... + ** + ** In the above, "OVER" might be a keyword, or it might be an alias for the + ** sum(x) expression. If a "%fallback ID OVER" directive were added to + ** grammar, then SQLite would always treat "OVER" as an alias, making it + ** impossible to call a window-function without a FILTER clause. + ** + ** WINDOW is treated as a keyword if: + ** + ** * the following token is an identifier, or a keyword that can fallback + ** to being an identifier, and + ** * the token after than one is TK_AS. + ** + ** OVER is a keyword if: + ** + ** * the previous token was TK_RP, and + ** * the next token is either TK_LP or an identifier. + ** + ** FILTER is a keyword if: + ** + ** * the previous token was TK_RP, and + ** * the next token is TK_LP. + */ +fn analyze_window_keyword( + scanner: &mut Scanner, + input: &[u8], +) -> Result { + let t = get_token(scanner, input)?; + if t != TK_ID { + return Ok(TK_ID); + }; + let t = get_token(scanner, input)?; + if t != TK_AS { + return Ok(TK_ID); + }; + Ok(TK_WINDOW) +} +fn analyze_over_keyword( + scanner: &mut Scanner, + input: &[u8], + last_token: TokenType, +) -> Result { + if last_token == TK_RP { + let t = get_token(scanner, input)?; + if t == TK_LP || t == TK_ID { + return Ok(TK_OVER); + } + } + Ok(TK_ID) +} +fn analyze_filter_keyword( + scanner: &mut Scanner, + input: &[u8], + last_token: TokenType, +) -> Result { + if last_token == TK_RP && get_token(scanner, input)? == TK_LP { + return Ok(TK_FILTER); + } + Ok(TK_ID) +} + +macro_rules! try_with_position { + ($scanner:expr, $expr:expr) => { + match $expr { + Ok(val) => val, + Err(err) => { + let mut err = Error::from(err); + err.position($scanner.line(), $scanner.column()); + return Err(err); + } + } + }; +} + +impl<'input> FallibleIterator for Parser<'input> { + type Item = Cmd; + type Error = Error; + + fn next(&mut self) -> Result, Error> { + //print!("line: {}, column: {}: ", self.scanner.line(), self.scanner.column()); + self.parser.ctx.reset(); + let mut last_token_parsed = TK_EOF; + let mut eof = false; + loop { + let (start, (value, mut token_type), end) = match self.scanner.scan(self.input)? { + (_, None, _) => { + eof = true; + break; + } + (start, Some(tuple), end) => (start, tuple, end), + }; + let token = if token_type >= TK_WINDOW { + debug_assert!( + token_type == TK_OVER || token_type == TK_FILTER || token_type == TK_WINDOW + ); + self.scanner.mark(); + if token_type == TK_WINDOW { + token_type = analyze_window_keyword(&mut self.scanner, self.input)?; + } else if token_type == TK_OVER { + token_type = + analyze_over_keyword(&mut self.scanner, self.input, last_token_parsed)?; + } else if token_type == TK_FILTER { + token_type = + analyze_filter_keyword(&mut self.scanner, self.input, last_token_parsed)?; + } + self.scanner.reset_to_mark(); + token_type.to_token(start, value, end) + } else { + token_type.to_token(start, value, end) + }; + //println!("({:?}, {:?})", token_type, token); + try_with_position!(self.scanner, self.parser.sqlite3Parser(token_type, token)); + last_token_parsed = token_type; + if self.parser.ctx.done() { + //println!(); + break; + } + } + if last_token_parsed == TK_EOF { + return Ok(None); // empty input + } + /* Upon reaching the end of input, call the parser two more times + with tokens TK_SEMI and 0, in that order. */ + if eof && self.parser.ctx.is_ok() { + if last_token_parsed != TK_SEMI { + try_with_position!( + self.scanner, + self.parser + .sqlite3Parser(TK_SEMI, sentinel(self.input.len())) + ); + } + try_with_position!( + self.scanner, + self.parser + .sqlite3Parser(TK_EOF, sentinel(self.input.len())) + ); + } + self.parser.sqlite3ParserFinalize(); + if let Some(e) = self.parser.ctx.error() { + let err = Error::ParserError(e, Some((self.scanner.line(), self.scanner.column()))); + return Err(err); + } + let cmd = self.parser.ctx.cmd(); + Ok(cmd) + } +} + +pub type Token<'input> = (&'input [u8], TokenType); + +#[derive(Default)] +pub struct Tokenizer {} + +impl Tokenizer { + pub fn new() -> Tokenizer { + Tokenizer {} + } +} + +/// ```compile_fail +/// use sqlite3_parser::lexer::sql::Tokenizer; +/// use sqlite3_parser::lexer::Scanner; +/// +/// let tokenizer = Tokenizer::new(); +/// let input = "PRAGMA parser_trace=ON;".as_bytes(); +/// let mut s = Scanner::new(input, tokenizer); +/// let (token1, _) = s.scan().unwrap().unwrap(); +/// s.scan().unwrap().unwrap(); +/// assert!(b"PRAGMA".eq_ignore_ascii_case(token1)); +/// ``` +impl Splitter for Tokenizer { + type Error = Error; + type TokenType = TokenType; + + fn split<'input>( + &mut self, + data: &'input [u8], + ) -> Result<(Option>, usize), Error> { + if data[0].is_ascii_whitespace() { + // eat as much space as possible + return Ok(( + None, + match data.iter().skip(1).position(|&b| !b.is_ascii_whitespace()) { + Some(i) => i + 1, + _ => data.len(), + }, + )); + } + return match data[0] { + b'-' => { + if let Some(b) = data.get(1) { + if *b == b'-' { + // eat comment + if let Some(i) = memchr(b'\n', data) { + Ok((None, i + 1)) + } else { + Ok((None, data.len())) + } + } else if *b == b'>' { + if let Some(b) = data.get(2) { + if *b == b'>' { + return Ok((Some((&data[..3], TK_PTR)), 3)); + } + } + Ok((Some((&data[..2], TK_PTR)), 2)) + } else { + Ok((Some((&data[..1], TK_MINUS)), 1)) + } + } else { + Ok((Some((&data[..1], TK_MINUS)), 1)) + } + } + b'(' => Ok((Some((&data[..1], TK_LP)), 1)), + b')' => Ok((Some((&data[..1], TK_RP)), 1)), + b';' => Ok((Some((&data[..1], TK_SEMI)), 1)), + b'+' => Ok((Some((&data[..1], TK_PLUS)), 1)), + b'*' => Ok((Some((&data[..1], TK_STAR)), 1)), + b'/' => { + if let Some(b) = data.get(1) { + if *b == b'*' { + // eat comment + let mut pb = 0; + let mut end = None; + for (i, b) in data.iter().enumerate().skip(2) { + if *b == b'/' && pb == b'*' { + end = Some(i); + break; + } + pb = *b; + } + if let Some(i) = end { + Ok((None, i + 1)) + } else { + Err(Error::UnterminatedBlockComment(None)) + } + } else { + Ok((Some((&data[..1], TK_SLASH)), 1)) + } + } else { + Ok((Some((&data[..1], TK_SLASH)), 1)) + } + } + b'%' => Ok((Some((&data[..1], TK_REM)), 1)), + b'=' => { + if let Some(b) = data.get(1) { + Ok(if *b == b'=' { + (Some((&data[..2], TK_EQ)), 2) + } else { + (Some((&data[..1], TK_EQ)), 1) + }) + } else { + Ok((Some((&data[..1], TK_EQ)), 1)) + } + } + b'<' => { + if let Some(b) = data.get(1) { + Ok(match *b { + b'=' => (Some((&data[..2], TK_LE)), 2), + b'>' => (Some((&data[..2], TK_NE)), 2), + b'<' => (Some((&data[..2], TK_LSHIFT)), 2), + _ => (Some((&data[..1], TK_LT)), 1), + }) + } else { + Ok((Some((&data[..1], TK_LT)), 1)) + } + } + b'>' => { + if let Some(b) = data.get(1) { + Ok(match *b { + b'=' => (Some((&data[..2], TK_GE)), 2), + b'>' => (Some((&data[..2], TK_RSHIFT)), 2), + _ => (Some((&data[..1], TK_GT)), 1), + }) + } else { + Ok((Some((&data[..1], TK_GT)), 1)) + } + } + b'!' => { + if let Some(b) = data.get(1) { + if *b == b'=' { + Ok((Some((&data[..2], TK_NE)), 2)) + } else { + Err(Error::ExpectedEqualsSign(None)) + } + } else { + Err(Error::ExpectedEqualsSign(None)) + } + } + b'|' => { + if let Some(b) = data.get(1) { + Ok(if *b == b'|' { + (Some((&data[..2], TK_CONCAT)), 2) + } else { + (Some((&data[..1], TK_BITOR)), 1) + }) + } else { + Ok((Some((&data[..1], TK_BITOR)), 1)) + } + } + b',' => Ok((Some((&data[..1], TK_COMMA)), 1)), + b'&' => Ok((Some((&data[..1], TK_BITAND)), 1)), + b'~' => Ok((Some((&data[..1], TK_BITNOT)), 1)), + quote @ b'`' | quote @ b'\'' | quote @ b'"' => literal(data, quote), + b'.' => { + if let Some(b) = data.get(1) { + if b.is_ascii_digit() { + fractional_part(data, 0) + } else { + Ok((Some((&data[..1], TK_DOT)), 1)) + } + } else { + Ok((Some((&data[..1], TK_DOT)), 1)) + } + } + b'0'..=b'9' => number(data), + b'[' => { + if let Some(i) = memchr(b']', data) { + // Keep original quotes / '[' ... ’]' + Ok((Some((&data[0..i + 1], TK_ID)), i + 1)) + } else { + Err(Error::UnterminatedBracket(None)) + } + } + b'?' => { + match data.iter().skip(1).position(|&b| !b.is_ascii_digit()) { + Some(i) => { + // do not include the '?' in the token + Ok((Some((&data[1..=i], TK_VARIABLE)), i + 1)) + } + None => Ok((Some((&data[1..], TK_VARIABLE)), data.len())), + } + } + b'$' | b'@' | b'#' | b':' => { + match data + .iter() + .skip(1) + .position(|&b| !is_identifier_continue(b)) + { + Some(0) => Err(Error::BadVariableName(None)), + Some(i) => { + // '$' is included as part of the name + Ok((Some((&data[..=i], TK_VARIABLE)), i + 1)) + } + None => { + if data.len() == 1 { + return Err(Error::BadVariableName(None)); + } + Ok((Some((data, TK_VARIABLE)), data.len())) + } + } + } + b if is_identifier_start(b) => { + if b == b'x' || b == b'X' { + if let Some(&b'\'') = data.get(1) { + blob_literal(data) + } else { + Ok(self.identifierish(data)) + } + } else { + Ok(self.identifierish(data)) + } + } + _ => Err(Error::UnrecognizedToken(None)), + }; + } +} + +fn literal(data: &[u8], quote: u8) -> Result<(Option>, usize), Error> { + debug_assert_eq!(data[0], quote); + let tt = if quote == b'\'' { TK_STRING } else { TK_ID }; + let mut pb = 0; + let mut end = None; + // data[0] == quote => skip(1) + for (i, b) in data.iter().enumerate().skip(1) { + if *b == quote { + if pb == quote { + // escaped quote + pb = 0; + continue; + } + } else if pb == quote { + end = Some(i); + break; + } + pb = *b; + } + if end.is_some() || pb == quote { + let i = match end { + Some(i) => i, + _ => data.len(), + }; + // keep original quotes in the token + Ok((Some((&data[0..i], tt)), i)) + } else { + Err(Error::UnterminatedLiteral(None)) + } +} + +fn blob_literal(data: &[u8]) -> Result<(Option>, usize), Error> { + debug_assert!(data[0] == b'x' || data[0] == b'X'); + debug_assert_eq!(data[1], b'\''); + return if let Some((i, b)) = data + .iter() + .enumerate() + .skip(2) + .find(|&(_, &b)| !b.is_ascii_hexdigit()) + { + if *b != b'\'' || i % 2 != 0 { + return Err(Error::MalformedBlobLiteral(None)); + } + Ok((Some((&data[2..i], TK_BLOB)), i + 1)) + } else { + Err(Error::MalformedBlobLiteral(None)) + }; +} + +fn number(data: &[u8]) -> Result<(Option>, usize), Error> { + debug_assert!(data[0].is_ascii_digit()); + if data[0] == b'0' { + if let Some(b) = data.get(1) { + if *b == b'x' || *b == b'X' { + return hex_integer(data); + } + } else { + return Ok((Some((data, TK_INTEGER)), data.len())); + } + } + return if let Some((i, b)) = data + .iter() + .enumerate() + .skip(1) + .find(|&(_, &b)| !b.is_ascii_digit()) + { + if *b == b'.' { + return fractional_part(data, i); + } else if *b == b'e' || *b == b'E' { + return exponential_part(data, i); + } else if is_identifier_start(*b) { + return Err(Error::BadNumber(None)); + } + Ok((Some((&data[..i], TK_INTEGER)), i)) + } else { + Ok((Some((data, TK_INTEGER)), data.len())) + }; +} + +fn hex_integer(data: &[u8]) -> Result<(Option>, usize), Error> { + debug_assert_eq!(data[0], b'0'); + debug_assert!(data[1] == b'x' || data[1] == b'X'); + return if let Some((i, b)) = data + .iter() + .enumerate() + .skip(2) + .find(|&(_, &b)| !b.is_ascii_hexdigit()) + { + // Must not be empty (Ox is invalid) + if i == 2 || is_identifier_start(*b) { + return Err(Error::MalformedHexInteger(None)); + } + Ok((Some((&data[..i], TK_INTEGER)), i)) + } else { + // Must not be empty (Ox is invalid) + if data.len() == 2 { + return Err(Error::MalformedHexInteger(None)); + } + Ok((Some((data, TK_INTEGER)), data.len())) + }; +} + +fn fractional_part(data: &[u8], i: usize) -> Result<(Option>, usize), Error> { + debug_assert_eq!(data[i], b'.'); + return if let Some((i, b)) = data + .iter() + .enumerate() + .skip(i + 1) + .find(|&(_, &b)| !b.is_ascii_digit()) + { + if *b == b'e' || *b == b'E' { + return exponential_part(data, i); + } else if is_identifier_start(*b) { + return Err(Error::BadNumber(None)); + } + Ok((Some((&data[..i], TK_FLOAT)), i)) + } else { + Ok((Some((data, TK_FLOAT)), data.len())) + }; +} + +fn exponential_part(data: &[u8], i: usize) -> Result<(Option>, usize), Error> { + debug_assert!(data[i] == b'e' || data[i] == b'E'); + // data[i] == 'e'|'E' + return if let Some(b) = data.get(i + 1) { + let i = if *b == b'+' || *b == b'-' { i + 1 } else { i }; + if let Some((i, b)) = data + .iter() + .enumerate() + .skip(i + 1) + .find(|&(_, &b)| !b.is_ascii_digit()) + { + if is_identifier_start(*b) { + return Err(Error::BadNumber(None)); + } + Ok((Some((&data[..i], TK_FLOAT)), i)) + } else { + if data.len() == i + 1 { + return Err(Error::BadNumber(None)); + } + Ok((Some((data, TK_FLOAT)), data.len())) + } + } else { + Err(Error::BadNumber(None)) + }; +} + +impl Tokenizer { + fn identifierish<'input>(&mut self, data: &'input [u8]) -> (Option>, usize) { + debug_assert!(is_identifier_start(data[0])); + // data[0] is_identifier_start => skip(1) + let end = data + .iter() + .skip(1) + .position(|&b| !is_identifier_continue(b)); + let i = match end { + Some(i) => i + 1, + _ => data.len(), + }; + let word = &data[..i]; + let tt = if word.len() >= 2 && word.len() <= MAX_KEYWORD_LEN && word.is_ascii() { + keyword_token(word).unwrap_or(TK_ID) + } else { + TK_ID + }; + (Some((word, tt)), i) + } +} + +#[cfg(test)] +mod tests { + use super::Tokenizer; + use crate::dialect::TokenType; + use crate::lexer::Scanner; + + #[test] + fn fallible_iterator() { + let tokenizer = Tokenizer::new(); + let input = "PRAGMA parser_trace=ON;".as_bytes(); + let mut s = Scanner::new(tokenizer); + let (token1, token_type1) = s.scan(input).unwrap().1.unwrap(); + assert!(b"PRAGMA".eq_ignore_ascii_case(token1)); + assert_eq!(TokenType::TK_PRAGMA, token_type1); + let (token2, token_type2) = s.scan(input).unwrap().1.unwrap(); + assert_eq!("parser_trace".as_bytes(), token2); + assert_eq!(TokenType::TK_ID, token_type2); + } +} diff --git a/vendored/sqlite3-parser/src/lexer/sql/test.rs b/vendored/sqlite3-parser/src/lexer/sql/test.rs new file mode 100644 index 0000000000..7119bff365 --- /dev/null +++ b/vendored/sqlite3-parser/src/lexer/sql/test.rs @@ -0,0 +1,123 @@ +use fallible_iterator::FallibleIterator; + +use super::{Error, Parser}; +use crate::parser::{ + ast::{Cmd, Name, ParameterInfo, QualifiedName, Stmt, ToTokens}, + ParserError, +}; + +#[test] +fn count_placeholders() -> Result<(), Error> { + let sql = "SELECT ? WHERE 1 = ?"; + let mut parser = Parser::new(sql.as_bytes()); + let ast = parser.next()?.unwrap(); + let mut info = ParameterInfo::default(); + ast.to_tokens(&mut info).unwrap(); + assert_eq!(info.count, 2); + Ok(()) +} + +#[test] +fn count_numbered_placeholders() -> Result<(), Error> { + let sql = "SELECT ?1 WHERE 1 = ?2 AND 0 = ?1"; + let mut parser = Parser::new(sql.as_bytes()); + let ast = parser.next()?.unwrap(); + let mut info = ParameterInfo::default(); + ast.to_tokens(&mut info).unwrap(); + assert_eq!(info.count, 2); + Ok(()) +} + +#[test] +fn count_unused_placeholders() -> Result<(), Error> { + let sql = "SELECT ?1 WHERE 1 = ?3"; + let mut parser = Parser::new(sql.as_bytes()); + let ast = parser.next()?.unwrap(); + let mut info = ParameterInfo::default(); + ast.to_tokens(&mut info).unwrap(); + assert_eq!(info.count, 3); + Ok(()) +} + +#[test] +fn count_named_placeholders() -> Result<(), Error> { + let sql = "SELECT :x, :y WHERE 1 = :y"; + let mut parser = Parser::new(sql.as_bytes()); + let ast = parser.next()?.unwrap(); + let mut info = ParameterInfo::default(); + ast.to_tokens(&mut info).unwrap(); + assert_eq!(info.count, 2); + assert_eq!(info.names.len(), 2); + assert!(info.names.contains(":x")); + assert!(info.names.contains(":y")); + Ok(()) +} + +#[test] +fn duplicate_column() { + let sql = "CREATE TABLE t (x TEXT, x TEXT)"; + let mut parser = Parser::new(sql.as_bytes()); + let r = parser.next(); + let Error::ParserError(ParserError::Custom(msg), _) = r.unwrap_err() else { + panic!("unexpected error type") + }; + assert!(msg.contains("duplicate column name")); +} + +#[test] +fn vtab_args() -> Result<(), Error> { + let sql = r#"CREATE VIRTUAL TABLE mail USING fts3( + subject VARCHAR(256) NOT NULL, + body TEXT CHECK(length(body)<10240) +);"#; + let mut parser = Parser::new(sql.as_bytes()); + let Cmd::Stmt(Stmt::CreateVirtualTable { + tbl_name: QualifiedName { + name: Name(tbl_name), + .. + }, + module_name: Name(module_name), + args: Some(args), + .. + }) = parser.next()?.unwrap() + else { + panic!("unexpected AST") + }; + assert_eq!(tbl_name, "mail"); + assert_eq!(module_name, "fts3"); + assert_eq!(args.len(), 2); + assert_eq!(args[0], "subject VARCHAR(256) NOT NULL"); + assert_eq!(args[1], "body TEXT CHECK(length(body)<10240)"); + Ok(()) +} + +#[test] +fn only_semicolons_no_statements() { + let sqls = ["", ";", ";;;"]; + for sql in sqls.iter() { + let mut parser = Parser::new(sql.as_bytes()); + assert_eq!(parser.next().unwrap(), None); + } +} + +#[test] +fn extra_semicolons_between_statements() { + let sqls = [ + "SELECT 1; SELECT 2", + "SELECT 1; SELECT 2;", + "; SELECT 1; SELECT 2", + ";; SELECT 1;; SELECT 2;;", + ]; + for sql in sqls.iter() { + let mut parser = Parser::new(sql.as_bytes()); + assert!(matches!( + parser.next().unwrap(), + Some(Cmd::Stmt(Stmt::Select { .. })) + )); + assert!(matches!( + parser.next().unwrap(), + Some(Cmd::Stmt(Stmt::Select { .. })) + )); + assert_eq!(parser.next().unwrap(), None); + } +} diff --git a/vendored/sqlite3-parser/src/lib.rs b/vendored/sqlite3-parser/src/lib.rs new file mode 100644 index 0000000000..63c7f6fe2d --- /dev/null +++ b/vendored/sqlite3-parser/src/lib.rs @@ -0,0 +1,5 @@ +pub mod dialect; +// In Lemon, the tokenizer calls the parser. +pub mod lexer; +mod parser; +pub use parser::ast; diff --git a/vendored/sqlite3-parser/src/parser/ast/mod.rs b/vendored/sqlite3-parser/src/parser/ast/mod.rs new file mode 100644 index 0000000000..0151c936a7 --- /dev/null +++ b/vendored/sqlite3-parser/src/parser/ast/mod.rs @@ -0,0 +1,3248 @@ +//! Abstract Syntax Tree + +use std::fmt::{self, Display, Formatter, Write}; +use std::num::ParseIntError; +use std::str::FromStr; + +use indexmap::IndexSet; + +use crate::dialect::TokenType::{self, *}; +use crate::dialect::{from_token, is_identifier, Token}; +use crate::parser::{parse::YYCODETYPE, ParserError}; + +struct FmtTokenStream<'a, 'b> { + f: &'a mut Formatter<'b>, + spaced: bool, +} +impl<'a, 'b> TokenStream for FmtTokenStream<'a, 'b> { + type Error = fmt::Error; + + fn append(&mut self, ty: TokenType, value: Option<&str>) -> fmt::Result { + if !self.spaced { + match ty { + TK_COMMA | TK_SEMI | TK_RP | TK_DOT => {} + _ => { + self.f.write_char(' ')?; + self.spaced = true; + } + }; + } + if ty == TK_BLOB { + self.f.write_char('X')?; + self.f.write_char('\'')?; + if let Some(str) = value { + self.f.write_str(str)?; + } + return self.f.write_char('\''); + } else if let Some(str) = ty.as_str() { + self.f.write_str(str)?; + self.spaced = ty == TK_LP || ty == TK_DOT; // str should not be whitespace + } + if let Some(str) = value { + // trick for pretty-print + self.spaced = str.bytes().all(|b| b.is_ascii_whitespace()); + /*if !self.spaced { + self.f.write_char(' ')?; + }*/ + self.f.write_str(str) + } else { + Ok(()) + } + } +} + +#[derive(Default)] +pub struct ParameterInfo { + pub count: u32, + pub names: IndexSet, +} + +// https://sqlite.org/lang_expr.html#parameters +impl TokenStream for ParameterInfo { + type Error = ParseIntError; + + fn append(&mut self, ty: TokenType, value: Option<&str>) -> Result<(), Self::Error> { + if ty == TK_VARIABLE { + if let Some(variable) = value { + if variable == "?" { + self.count = self.count.saturating_add(1); + } else if variable.as_bytes()[0] == b'?' { + let n = u32::from_str(&variable[1..])?; + if n > self.count { + self.count = n; + } + } else if self.names.insert(variable.to_owned()) { + self.count = self.count.saturating_add(1); + } + } + } + Ok(()) + } +} + +pub trait TokenStream { + type Error; + + fn append(&mut self, ty: TokenType, value: Option<&str>) -> Result<(), Self::Error>; +} + +pub trait ToTokens { + fn to_tokens(&self, s: &mut S) -> Result<(), S::Error>; + + fn to_fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + let mut s = FmtTokenStream { f, spaced: true }; + self.to_tokens(&mut s) + } +} + +impl ToTokens for &T { + fn to_tokens(&self, s: &mut S) -> Result<(), S::Error> { + ToTokens::to_tokens(&**self, s) + } +} + +impl ToTokens for String { + fn to_tokens(&self, s: &mut S) -> Result<(), S::Error> { + s.append(TK_ANY, Some(self.as_ref())) + } +} + +/* FIXME: does not work, find why +impl Display for dyn ToTokens { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + let mut s = FmtTokenStream { f, spaced: true }; + match self.to_tokens(&mut s) { + Err(_) => Err(fmt::Error), + Ok(()) => Ok(()), + } + } +} +*/ + +// https://sqlite.org/syntax/sql-stmt.html +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum Cmd { + Explain(Stmt), + ExplainQueryPlan(Stmt), + Stmt(Stmt), +} + +impl ToTokens for Cmd { + fn to_tokens(&self, s: &mut S) -> Result<(), S::Error> { + match self { + Cmd::Explain(stmt) => { + s.append(TK_EXPLAIN, None)?; + stmt.to_tokens(s)?; + } + Cmd::ExplainQueryPlan(stmt) => { + s.append(TK_EXPLAIN, None)?; + s.append(TK_QUERY, None)?; + s.append(TK_PLAN, None)?; + stmt.to_tokens(s)?; + } + Cmd::Stmt(stmt) => { + stmt.to_tokens(s)?; + } + } + s.append(TK_SEMI, None) + } +} + +impl Display for Cmd { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + self.to_fmt(f) + } +} + +pub(crate) enum ExplainKind { + Explain, + QueryPlan, +} + +// https://sqlite.org/syntax/sql-stmt.html +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum Stmt { + // table name, body + AlterTable(QualifiedName, AlterTableBody), + // object name + Analyze(Option), + Attach { + // TODO distinction between ATTACH and ATTACH DATABASE + expr: Expr, + db_name: Expr, + key: Option, + }, + // tx type, tx name + Begin(Option, Option), + // tx name + Commit(Option), // TODO distinction between COMMIT and END + CreateIndex { + unique: bool, + if_not_exists: bool, + idx_name: QualifiedName, + tbl_name: Name, + columns: Vec, + where_clause: Option, + }, + CreateTable { + temporary: bool, // TODO distinction between TEMP and TEMPORARY + if_not_exists: bool, + tbl_name: QualifiedName, + body: CreateTableBody, + }, + CreateTrigger { + temporary: bool, + if_not_exists: bool, + trigger_name: QualifiedName, + time: Option, + event: TriggerEvent, + tbl_name: QualifiedName, + for_each_row: bool, + when_clause: Option, + commands: Vec, + }, + CreateView { + temporary: bool, + if_not_exists: bool, + view_name: QualifiedName, + columns: Option>, + select: Select, + }, + CreateVirtualTable { + if_not_exists: bool, + tbl_name: QualifiedName, + module_name: Name, + args: Option>, // TODO smol str + }, + Delete { + with: Option, + tbl_name: QualifiedName, + indexed: Option, + where_clause: Option, + returning: Option>, + order_by: Option>, + limit: Option, + }, + // db name + Detach(Expr), // TODO distinction between DETACH and DETACH DATABASE + DropIndex { + if_exists: bool, + idx_name: QualifiedName, + }, + DropTable { + if_exists: bool, + tbl_name: QualifiedName, + }, + DropTrigger { + if_exists: bool, + trigger_name: QualifiedName, + }, + DropView { + if_exists: bool, + view_name: QualifiedName, + }, + Insert { + with: Option, + or_conflict: Option, // TODO distinction between REPLACE and INSERT OR REPLACE + tbl_name: QualifiedName, + columns: Option>, + body: InsertBody, + returning: Option>, + }, + // pragma name, body + Pragma(QualifiedName, Option), + Reindex { + obj_name: Option, + }, + // savepoint name + Release(Name), // TODO distinction between RELEASE and RELEASE SAVEPOINT + Rollback { + tx_name: Option, + savepoint_name: Option, // TODO distinction between TO and TO SAVEPOINT + }, + // savepoint name + Savepoint(Name), + Select(Select), + Update { + with: Option, + or_conflict: Option, + tbl_name: QualifiedName, + indexed: Option, + sets: Vec, + from: Option, + where_clause: Option, + returning: Option>, + order_by: Option>, + limit: Option, + }, + // database name, into expr + Vacuum(Option, Option), +} + +impl ToTokens for Stmt { + fn to_tokens(&self, s: &mut S) -> Result<(), S::Error> { + match self { + Stmt::AlterTable(tbl_name, body) => { + s.append(TK_ALTER, None)?; + s.append(TK_TABLE, None)?; + tbl_name.to_tokens(s)?; + body.to_tokens(s) + } + Stmt::Analyze(obj_name) => { + s.append(TK_ANALYZE, None)?; + if let Some(obj_name) = obj_name { + obj_name.to_tokens(s)?; + } + Ok(()) + } + Stmt::Attach { expr, db_name, key } => { + s.append(TK_ATTACH, None)?; + expr.to_tokens(s)?; + s.append(TK_AS, None)?; + db_name.to_tokens(s)?; + if let Some(key) = key { + s.append(TK_KEY, None)?; + key.to_tokens(s)?; + } + Ok(()) + } + Stmt::Begin(tx_type, tx_name) => { + s.append(TK_BEGIN, None)?; + if let Some(tx_type) = tx_type { + tx_type.to_tokens(s)?; + } + if let Some(tx_name) = tx_name { + s.append(TK_TRANSACTION, None)?; + tx_name.to_tokens(s)?; + } + Ok(()) + } + Stmt::Commit(tx_name) => { + s.append(TK_COMMIT, None)?; + if let Some(tx_name) = tx_name { + s.append(TK_TRANSACTION, None)?; + tx_name.to_tokens(s)?; + } + Ok(()) + } + Stmt::CreateIndex { + unique, + if_not_exists, + idx_name, + tbl_name, + columns, + where_clause, + } => { + s.append(TK_CREATE, None)?; + if *unique { + s.append(TK_UNIQUE, None)?; + } + s.append(TK_INDEX, None)?; + if *if_not_exists { + s.append(TK_IF, None)?; + s.append(TK_NOT, None)?; + s.append(TK_EXISTS, None)?; + } + idx_name.to_tokens(s)?; + s.append(TK_ON, None)?; + tbl_name.to_tokens(s)?; + s.append(TK_LP, None)?; + comma(columns, s)?; + s.append(TK_RP, None)?; + if let Some(where_clause) = where_clause { + s.append(TK_WHERE, None)?; + where_clause.to_tokens(s)?; + } + Ok(()) + } + Stmt::CreateTable { + temporary, + if_not_exists, + tbl_name, + body, + } => { + s.append(TK_CREATE, None)?; + if *temporary { + s.append(TK_TEMP, None)?; + } + s.append(TK_TABLE, None)?; + if *if_not_exists { + s.append(TK_IF, None)?; + s.append(TK_NOT, None)?; + s.append(TK_EXISTS, None)?; + } + tbl_name.to_tokens(s)?; + body.to_tokens(s) + } + Stmt::CreateTrigger { + temporary, + if_not_exists, + trigger_name, + time, + event, + tbl_name, + for_each_row, + when_clause, + commands, + } => { + s.append(TK_CREATE, None)?; + if *temporary { + s.append(TK_TEMP, None)?; + } + s.append(TK_TRIGGER, None)?; + if *if_not_exists { + s.append(TK_IF, None)?; + s.append(TK_NOT, None)?; + s.append(TK_EXISTS, None)?; + } + trigger_name.to_tokens(s)?; + if let Some(time) = time { + time.to_tokens(s)?; + } + event.to_tokens(s)?; + s.append(TK_ON, None)?; + tbl_name.to_tokens(s)?; + if *for_each_row { + s.append(TK_FOR, None)?; + s.append(TK_EACH, None)?; + s.append(TK_ROW, None)?; + } + if let Some(when_clause) = when_clause { + s.append(TK_WHEN, None)?; + when_clause.to_tokens(s)?; + } + s.append(TK_BEGIN, Some("\n"))?; + for command in commands { + command.to_tokens(s)?; + s.append(TK_SEMI, Some("\n"))?; + } + s.append(TK_END, None) + } + Stmt::CreateView { + temporary, + if_not_exists, + view_name, + columns, + select, + } => { + s.append(TK_CREATE, None)?; + if *temporary { + s.append(TK_TEMP, None)?; + } + s.append(TK_VIEW, None)?; + if *if_not_exists { + s.append(TK_IF, None)?; + s.append(TK_NOT, None)?; + s.append(TK_EXISTS, None)?; + } + view_name.to_tokens(s)?; + if let Some(columns) = columns { + s.append(TK_LP, None)?; + comma(columns, s)?; + s.append(TK_RP, None)?; + } + s.append(TK_AS, None)?; + select.to_tokens(s) + } + Stmt::CreateVirtualTable { + if_not_exists, + tbl_name, + module_name, + args, + } => { + s.append(TK_CREATE, None)?; + s.append(TK_VIRTUAL, None)?; + s.append(TK_TABLE, None)?; + if *if_not_exists { + s.append(TK_IF, None)?; + s.append(TK_NOT, None)?; + s.append(TK_EXISTS, None)?; + } + tbl_name.to_tokens(s)?; + s.append(TK_USING, None)?; + module_name.to_tokens(s)?; + s.append(TK_LP, None)?; + if let Some(args) = args { + comma(args, s)?; + } + s.append(TK_RP, None) + } + Stmt::Delete { + with, + tbl_name, + indexed, + where_clause, + returning, + order_by, + limit, + } => { + if let Some(with) = with { + with.to_tokens(s)?; + } + s.append(TK_DELETE, None)?; + s.append(TK_FROM, None)?; + tbl_name.to_tokens(s)?; + if let Some(indexed) = indexed { + indexed.to_tokens(s)?; + } + if let Some(where_clause) = where_clause { + s.append(TK_WHERE, None)?; + where_clause.to_tokens(s)?; + } + if let Some(returning) = returning { + s.append(TK_RETURNING, None)?; + comma(returning, s)?; + } + if let Some(order_by) = order_by { + s.append(TK_ORDER, None)?; + s.append(TK_BY, None)?; + comma(order_by, s)?; + } + if let Some(limit) = limit { + limit.to_tokens(s)?; + } + Ok(()) + } + Stmt::Detach(expr) => { + s.append(TK_DETACH, None)?; + expr.to_tokens(s) + } + Stmt::DropIndex { + if_exists, + idx_name, + } => { + s.append(TK_DROP, None)?; + s.append(TK_INDEX, None)?; + if *if_exists { + s.append(TK_IF, None)?; + s.append(TK_EXISTS, None)?; + } + idx_name.to_tokens(s) + } + Stmt::DropTable { + if_exists, + tbl_name, + } => { + s.append(TK_DROP, None)?; + s.append(TK_TABLE, None)?; + if *if_exists { + s.append(TK_IF, None)?; + s.append(TK_EXISTS, None)?; + } + tbl_name.to_tokens(s) + } + Stmt::DropTrigger { + if_exists, + trigger_name, + } => { + s.append(TK_DROP, None)?; + s.append(TK_TRIGGER, None)?; + if *if_exists { + s.append(TK_IF, None)?; + s.append(TK_EXISTS, None)?; + } + trigger_name.to_tokens(s) + } + Stmt::DropView { + if_exists, + view_name, + } => { + s.append(TK_DROP, None)?; + s.append(TK_VIEW, None)?; + if *if_exists { + s.append(TK_IF, None)?; + s.append(TK_EXISTS, None)?; + } + view_name.to_tokens(s) + } + Stmt::Insert { + with, + or_conflict, + tbl_name, + columns, + body, + returning, + } => { + if let Some(with) = with { + with.to_tokens(s)?; + } + if let Some(ResolveType::Replace) = or_conflict { + s.append(TK_REPLACE, None)?; + } else { + s.append(TK_INSERT, None)?; + if let Some(or_conflict) = or_conflict { + s.append(TK_OR, None)?; + or_conflict.to_tokens(s)?; + } + } + s.append(TK_INTO, None)?; + tbl_name.to_tokens(s)?; + if let Some(columns) = columns { + s.append(TK_LP, None)?; + comma(columns, s)?; + s.append(TK_RP, None)?; + } + body.to_tokens(s)?; + if let Some(returning) = returning { + s.append(TK_RETURNING, None)?; + comma(returning, s)?; + } + Ok(()) + } + Stmt::Pragma(name, value) => { + s.append(TK_PRAGMA, None)?; + name.to_tokens(s)?; + if let Some(value) = value { + value.to_tokens(s)?; + } + Ok(()) + } + Stmt::Reindex { obj_name } => { + s.append(TK_REINDEX, None)?; + if let Some(obj_name) = obj_name { + obj_name.to_tokens(s)?; + } + Ok(()) + } + Stmt::Release(name) => { + s.append(TK_RELEASE, None)?; + name.to_tokens(s) + } + Stmt::Rollback { + tx_name, + savepoint_name, + } => { + s.append(TK_ROLLBACK, None)?; + if let Some(tx_name) = tx_name { + s.append(TK_TRANSACTION, None)?; + tx_name.to_tokens(s)?; + } + if let Some(savepoint_name) = savepoint_name { + s.append(TK_TO, None)?; + savepoint_name.to_tokens(s)?; + } + Ok(()) + } + Stmt::Savepoint(name) => { + s.append(TK_SAVEPOINT, None)?; + name.to_tokens(s) + } + Stmt::Select(select) => select.to_tokens(s), + Stmt::Update { + with, + or_conflict, + tbl_name, + indexed, + sets, + from, + where_clause, + returning, + order_by, + limit, + } => { + if let Some(with) = with { + with.to_tokens(s)?; + } + s.append(TK_UPDATE, None)?; + if let Some(or_conflict) = or_conflict { + s.append(TK_OR, None)?; + or_conflict.to_tokens(s)?; + } + tbl_name.to_tokens(s)?; + if let Some(indexed) = indexed { + indexed.to_tokens(s)?; + } + s.append(TK_SET, None)?; + comma(sets, s)?; + if let Some(from) = from { + s.append(TK_FROM, None)?; + from.to_tokens(s)?; + } + if let Some(where_clause) = where_clause { + s.append(TK_WHERE, None)?; + where_clause.to_tokens(s)?; + } + if let Some(returning) = returning { + s.append(TK_RETURNING, None)?; + comma(returning, s)?; + } + if let Some(order_by) = order_by { + s.append(TK_ORDER, None)?; + s.append(TK_BY, None)?; + comma(order_by, s)?; + } + if let Some(limit) = limit { + limit.to_tokens(s)?; + } + Ok(()) + } + Stmt::Vacuum(name, expr) => { + s.append(TK_VACUUM, None)?; + if let Some(ref name) = name { + name.to_tokens(s)?; + } + if let Some(ref expr) = expr { + s.append(TK_INTO, None)?; + expr.to_tokens(s)?; + } + Ok(()) + } + } + } +} + +// https://sqlite.org/syntax/expr.html +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum Expr { + Between { + lhs: Box, + not: bool, + start: Box, + end: Box, + }, + Binary(Box, Operator, Box), + // CASE expression + Case { + base: Option>, + when_then_pairs: Vec<(Expr, Expr)>, + else_expr: Option>, + }, + // CAST expression + Cast { + expr: Box, + type_name: Type, + }, + // COLLATE expression + Collate(Box, String), + // schema-name.table-name.column-name + DoublyQualified(Name, Name, Name), + // EXISTS subquery + Exists(Box, + }, + InTable { + lhs: Box, + not: bool, + rhs: QualifiedName, + args: Option>, + }, + IsNull(Box), + Like { + lhs: Box, + not: bool, + op: LikeOperator, + rhs: Box, + escape: Option>, + }, + // Literal expression + Literal(Literal), + Name(Name), + // "NOT NULL" or "NOTNULL" + NotNull(Box), + // Parenthesized subexpression + Parenthesized(Vec), + Qualified(Name, Name), + // RAISE function call + Raise(ResolveType, Option), + // Subquery expression + Subquery(Box