From 0ac414d9cf00b909a2aa5ef9a49259681bf3fe78 Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Tue, 16 Jul 2024 10:58:36 +0200 Subject: [PATCH 1/4] lempar: use Vec instead of SmallVec MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This move creates a huge different in Parser::new, and when I say, huge I say 1444%. SmallVec triggers too many memmoves. ```bash Running benches/sqlparser_bench.rs (target/release/deps/sqlparser_bench-90367034c12b9cef) sqlparser-rs parsing benchmark/sqlparser::select time: [257.62 ns 258.12 ns 258.69 ns] sqlparser-rs parsing benchmark/sqlparser::select time: [3.9573 µs 3.9604 µs 3.9654 µs] change: [+1443.7% +1449.0% +1453.8%] (p = 0.00 < 0.05) Performance has regressed. ``` Signed-off-by: Pere Diaz Bou --- third_party/lemon/lempar.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/third_party/lemon/lempar.rs b/third_party/lemon/lempar.rs index 83cb246..ff855cf 100644 --- a/third_party/lemon/lempar.rs +++ b/third_party/lemon/lempar.rs @@ -174,8 +174,6 @@ pub struct yyStackEntry { ** is the value of the token */ } -use smallvec::SmallVec; - /* The state of the parser is completely contained in an instance of ** the following structure */ #[allow(non_camel_case_types)] @@ -186,7 +184,7 @@ pub struct yyParser<'input> { //#[cfg(not(feature = "YYNOERRORRECOVERY"))] yyerrcnt: i32, /* Shifts left before out of the error */ %% /* A place to hold %extra_context */ - yystack: SmallVec<[yyStackEntry; YYSTACKDEPTH]>, /* The parser's stack */ + yystack: Vec, /* The parser's stack */ } use std::cmp::Ordering; @@ -324,7 +322,7 @@ impl yyParser<'_> { yyidx: 0, #[cfg(feature = "YYTRACKMAXSTACKDEPTH")] yyhwm: 0, - yystack: SmallVec::new(), + yystack: Vec::new(), //#[cfg(not(feature = "YYNOERRORRECOVERY"))] yyerrcnt: -1, %% /* Optional %extra_context store */ From fb5e01b1a63658ee55853b02ea7e3efd6e3992a1 Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Tue, 16 Jul 2024 11:15:20 +0200 Subject: [PATCH 2/4] remove smallvec dependency Signed-off-by: Pere Diaz Bou --- Cargo.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 5a69a4c..b4150a7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -29,7 +29,6 @@ phf = { version = "0.11", features = ["uncased"] } log = "0.4" memchr = "2.0" fallible-iterator = "0.3" -smallvec = ">=1.6.1" bitflags = "2.0" uncased = "0.9" indexmap = "2.0" From cbc8c14cedf5a480ba1fba745a8954f4668e438d Mon Sep 17 00:00:00 2001 From: gwenn Date: Sat, 20 Jul 2024 11:52:22 +0200 Subject: [PATCH 3/4] Remove code associated to fixed vs dynamic yystack --- Cargo.toml | 1 - src/parser/mod.rs | 3 -- src/parser/parse.y | 4 --- third_party/lemon/lemon.c | 2 -- third_party/lemon/lempar.rs | 57 ++----------------------------------- 5 files changed, 2 insertions(+), 65 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index b4150a7..d00feaf 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,7 +19,6 @@ maintenance = { status = "experimental" } # FIXME: specific to one parser, not global YYTRACKMAXSTACKDEPTH = [] YYNOERRORRECOVERY = [] -YYSTACKDYNAMIC = [] YYCOVERAGE = [] NDEBUG = [] default = ["YYNOERRORRECOVERY"] diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 62f345b..05524bf 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -20,8 +20,6 @@ use ast::{Cmd, ExplainKind, Name, Stmt}; /// Parser error #[derive(Debug, PartialEq)] pub enum ParserError { - /// Stack overflow - StackOverflow, /// Syntax error SyntaxError { /// token type @@ -38,7 +36,6 @@ pub enum ParserError { impl std::fmt::Display for ParserError { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { match self { - ParserError::StackOverflow => f.write_str("parser overflowed its stack"), ParserError::SyntaxError { token_type, found } => { write!(f, "near {}, \"{:?}\": syntax error", token_type, found) } diff --git a/src/parser/parse.y b/src/parser/parse.y index 15e287e..174a1b7 100644 --- a/src/parser/parse.y +++ b/src/parser/parse.y @@ -48,10 +48,6 @@ }); } } -%stack_overflow { - error!(target: TARGET, "parser stack overflow"); - self.ctx.error = Some(ParserError::StackOverflow); -} // The name of the generated procedure that implements the parser // is as follows: diff --git a/third_party/lemon/lemon.c b/third_party/lemon/lemon.c index aea1d09..8ead377 100644 --- a/third_party/lemon/lemon.c +++ b/third_party/lemon/lemon.c @@ -4520,8 +4520,6 @@ void ReportTable( print_stack_union(out,lemp,&lineno); if( lemp->stacksize ){ fprintf(out,"const YYSTACKDEPTH: usize = %s;\n",lemp->stacksize); lineno++; - }else{ - fprintf(out,"const YYSTACKDEPTH: usize = 128;\n"); lineno++; } if( lemp->errsym && lemp->errsym->useCnt ){ fprintf(out,"const YYERRORSYMBOL: YYCODETYPE = %d;\n",lemp->errsym->index); lineno++; diff --git a/third_party/lemon/lempar.rs b/third_party/lemon/lempar.rs index ff855cf..28695fe 100644 --- a/third_party/lemon/lempar.rs +++ b/third_party/lemon/lempar.rs @@ -263,22 +263,9 @@ static yyRuleName: [&str; YYNRULE] = [ */ impl yyParser<'_> { fn yy_grow_stack_if_needed(&mut self) -> bool { - if self.yyidx >= self.yystack.capacity() { - if self.yyGrowStack() { - self.yyidx = self.yyidx.checked_sub(1).unwrap(); - self.yyStackOverflow(); - return true; - } - } false } fn yy_grow_stack_for_push(&mut self) -> bool { - if self.yyidx >= self.yystack.capacity() - 1 { - if self.yyGrowStack() { - self.yyStackOverflow(); - return true; - } - } // yystack is not prefilled with zero value like in C. if self.yyidx == self.yystack.len() { self.yystack.push(yyStackEntry::default()); @@ -287,29 +274,6 @@ impl yyParser<'_> { } false } - - #[allow(non_snake_case)] - #[cfg(feature = "YYSTACKDYNAMIC")] - fn yyGrowStack(&mut self) -> bool { - let capacity = self.yystack.capacity(); - let additional = capacity + 100; - self.yystack.reserve(additional); - #[cfg(not(feature = "NDEBUG"))] - { - debug!( - target: TARGET, - "Stack grows from {} to {} entries.", - capacity, - self.yystack.capacity() - ); - } - false - } - #[allow(non_snake_case)] - #[cfg(not(feature = "YYSTACKDYNAMIC"))] - fn yyGrowStack(&mut self) -> bool { - true - } } /* Initialize a new parser. @@ -515,26 +479,9 @@ fn yy_find_reduce_action( yy_action[i as usize] } -/* -** The following routine is called if the stack overflows. -*/ -impl yyParser<'_> { - #[allow(non_snake_case)] - fn yyStackOverflow(&mut self) { - #[cfg(not(feature = "NDEBUG"))] - { - error!(target: TARGET, "Stack Overflow!"); - } - while self.yyidx > 0 { - self.yy_pop_parser_stack(); - } - /* Here code is inserted which will execute if the parser - ** stack every overflows */ - /******** Begin %stack_overflow code ******************************************/ + /******** Begin %stack_overflow code ****************************************** %% - /******** End %stack_overflow code ********************************************/ - } -} + ******** End %stack_overflow code ********************************************/ /* ** Print tracing information for a SHIFT action From f83a162c43647c0db57841bbe40a61cc29e9d8ea Mon Sep 17 00:00:00 2001 From: gwenn Date: Sat, 20 Jul 2024 13:05:41 +0200 Subject: [PATCH 4/4] Fix sqlparser_bench --- sqlparser_bench/benches/sqlparser_bench.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/sqlparser_bench/benches/sqlparser_bench.rs b/sqlparser_bench/benches/sqlparser_bench.rs index c0c7ef7..a185566 100644 --- a/sqlparser_bench/benches/sqlparser_bench.rs +++ b/sqlparser_bench/benches/sqlparser_bench.rs @@ -18,10 +18,10 @@ fn basic_queries(c: &mut Criterion) { let mut group = c.benchmark_group("sqlparser-rs parsing benchmark"); let string = b"SELECT * FROM `table` WHERE 1 = 1"; - group.bench_function("sqlparser::select", |b| { + group.bench_with_input("sqlparser::select", &string, |b, &s| { b.iter(|| { - let mut parser = Parser::new(string); - parser.next() + let mut parser = Parser::new(s); + assert!(parser.next().unwrap().unwrap().readonly()) }); }); @@ -36,10 +36,10 @@ fn basic_queries(c: &mut Criterion) { SELECT * FROM `table` LEFT JOIN derived USING (user_id) "; - group.bench_function("sqlparser::with_select", |b| { + group.bench_with_input("sqlparser::with_select", &with_query, |b, &s| { b.iter(|| { - let mut parser = Parser::new(with_query); - parser.next() + let mut parser = Parser::new(s); + assert!(parser.next().unwrap().unwrap().readonly()) }); }); }