Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Drop support of buf_redux / streaming input #26

Merged
merged 4 commits into from
Mar 15, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,14 @@ YYNOERRORRECOVERY = []
YYSTACKDYNAMIC = []
YYCOVERAGE = []
NDEBUG = []
default = ["YYNOERRORRECOVERY", "buf_redux"]
default = ["YYNOERRORRECOVERY"]

[dependencies]
phf = { version = "0.11", features = ["uncased"] }
log = "0.4"
memchr = "2.0"
fallible-iterator = "0.2"
smallvec = ">=1.6.1"
buf_redux = { version = "0.8", optional = true }
bitflags = "1.3"
uncased = "0.9"
indexmap = "1.9"
Expand Down
8 changes: 3 additions & 5 deletions examples/sql_check.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
use fallible_iterator::FallibleIterator;
use std::env;
use std::fs::File;
use std::fs::read;
use std::panic;

use sqlite3_parser::lexer::sql::Parser;
use sqlite3_parser::lexer::InputStream;

/// Parse specified files and check all commands.
fn main() {
Expand All @@ -13,9 +12,8 @@ fn main() {
for arg in args.skip(1) {
println!("{arg}");
let result = panic::catch_unwind(|| {
let f = File::open(arg.clone()).unwrap();
let input = InputStream::new(f);
let mut parser = Parser::new(input);
let input = read(arg.clone()).unwrap();
let mut parser = Parser::new(&input);
loop {
match parser.next() {
Ok(None) => break,
Expand Down
8 changes: 3 additions & 5 deletions examples/sql_cmds.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
use fallible_iterator::FallibleIterator;
use std::env;
use std::fs::File;
use std::fs::read;
use std::panic;

use sqlite3_parser::lexer::sql::Parser;
use sqlite3_parser::lexer::InputStream;

/// Parse specified files and print all commands.
fn main() {
Expand All @@ -13,9 +12,8 @@ fn main() {
for arg in args.skip(1) {
println!("{arg}");
let result = panic::catch_unwind(|| {
let f = File::open(arg.clone()).unwrap();
let input = InputStream::new(f);
let mut parser = Parser::new(input);
let input = read(arg.clone()).unwrap();
let mut parser = Parser::new(input.as_ref());
loop {
match parser.next() {
Ok(None) => break,
Expand Down
9 changes: 4 additions & 5 deletions examples/sql_tokens.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
use sqlite3_parser::lexer::sql::{TokenType, Tokenizer};
use sqlite3_parser::lexer::{InputStream, Scanner};
use sqlite3_parser::lexer::Scanner;

use std::env;
use std::fs::File;
use std::fs::read;
use std::i64;
use std::str;

Expand All @@ -11,10 +11,9 @@ fn main() {
use TokenType::*;
let args = env::args();
for arg in args.skip(1) {
let f = File::open(arg.clone()).unwrap();
let input = InputStream::new(f);
let input = read(arg.clone()).unwrap();
let tokenizer = Tokenizer::new();
let mut s = Scanner::new(input, tokenizer);
let mut s = Scanner::new(&input, tokenizer);
loop {
match s.scan() {
Ok(None) => break,
Expand Down
4 changes: 1 addition & 3 deletions src/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,4 @@
mod scan;
pub mod sql;

#[cfg(feature = "buf_redux")]
pub use scan::InputStream;
pub use scan::{Input, ScanError, Scanner, Splitter};
pub use scan::{ScanError, Scanner, Splitter};
208 changes: 16 additions & 192 deletions src/lexer/scan.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,170 +6,6 @@ use std::error::Error;
use std::fmt;
use std::io;

#[cfg(feature = "buf_redux")]
use buf_redux::Buffer;
#[cfg(feature = "buf_redux")]
const MAX_CAPACITY: usize = 1024 * 1024 * 1024;

pub trait Input: fmt::Debug {
fn fill_buf(&mut self) -> io::Result<()>; // -> io::Result<&[u8]>;
fn eof(&self) -> bool; //&mut self -> io::Result<bool>
fn consume(&mut self, amount: usize); // -> &[u8]
fn buffer(&self) -> &[u8];
fn is_empty(&self) -> bool;
fn len(&self) -> usize;
}

/// Memory input
impl Input for &[u8] {
#[inline]
fn fill_buf(&mut self) -> io::Result<()> {
Ok(())
}

#[inline]
fn eof(&self) -> bool {
true
}

#[inline]
fn consume(&mut self, amt: usize) {
*self = &self[amt..];
}

#[inline]
fn buffer(&self) -> &[u8] {
self
}

#[inline]
fn is_empty(&self) -> bool {
(*self).is_empty()
}

#[inline]
fn len(&self) -> usize {
(*self).len()
}
}

impl Input for Vec<u8> {
#[inline]
fn fill_buf(&mut self) -> io::Result<()> {
Ok(())
}

#[inline]
fn eof(&self) -> bool {
true
}

#[inline]
fn consume(&mut self, amt: usize) {
self.drain(..amt);
}

#[inline]
fn buffer(&self) -> &[u8] {
self
}

#[inline]
fn is_empty(&self) -> bool {
self.is_empty()
}

#[inline]
fn len(&self) -> usize {
self.len()
}
}

/// Streaming input
#[cfg(feature = "buf_redux")]
pub struct InputStream<R> {
/// The reader provided by the client.
inner: R,
/// Buffer used as argument to split.
buf: Buffer,
eof: bool,
}

#[cfg(feature = "buf_redux")]
impl<R: io::Read> InputStream<R> {
pub fn new(inner: R) -> Self {
Self::with_capacity(inner, 4096)
}

fn with_capacity(inner: R, capacity: usize) -> Self {
let buf = Buffer::with_capacity_ringbuf(capacity);
InputStream {
inner,
buf,
eof: false,
}
}
}

#[cfg(feature = "buf_redux")]
impl<R: io::Read> Input for InputStream<R> {
fn fill_buf(&mut self) -> io::Result<()> {
debug!(target: "scanner", "fill_buf: {}", self.buf.capacity());
// Is the buffer full? If so, resize.
if self.buf.free_space() == 0 {
let mut capacity = self.buf.capacity();
if capacity * 2 < MAX_CAPACITY {
capacity *= 2;
self.buf.make_room();
self.buf.reserve(capacity);
} else {
return Err(io::Error::from(io::ErrorKind::UnexpectedEof)); // FIXME
}
} else if self.buf.usable_space() == 0 {
self.buf.make_room();
}
// Finally we can read some input.
let sz = self.buf.read_from(&mut self.inner)?;
self.eof = sz == 0;
Ok(())
}

#[inline]
fn eof(&self) -> bool {
self.eof
}

#[inline]
fn consume(&mut self, amt: usize) {
self.buf.consume(amt);
}

#[inline]
fn buffer(&self) -> &[u8] {
self.buf.buf()
}

#[inline]
fn is_empty(&self) -> bool {
self.buf.is_empty()
}

#[inline]
fn len(&self) -> usize {
self.buf.len()
}
}

#[cfg(feature = "buf_redux")]
impl<R> fmt::Debug for InputStream<R> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("InputStream")
.field("input", &self.buf)
.field("eof", &self.eof)
.finish()
}
}

pub trait ScanError: Error + From<io::Error> + Sized {
fn position(&mut self, line: u64, column: usize);
}
Expand All @@ -186,19 +22,15 @@ pub trait Splitter: Sized {
type TokenType;

/// The arguments are an initial substring of the remaining unprocessed
/// data and a flag, `eof`, that reports whether the Reader has no more data
/// to give.
/// data.
///
/// If the returned error is non-nil, scanning stops and the error
/// is returned to the client.
///
/// The function is never called with an empty data slice unless at EOF.
/// If `eof` is true, however, data may be non-empty and,
/// as always, holds unprocessed text.
/// The function is never called with an empty data slice.
fn split<'input>(
&mut self,
data: &'input [u8],
eof: bool,
) -> SplitResult<'input, Self::TokenType, Self::Error>;
}

Expand All @@ -209,9 +41,9 @@ pub trait Splitter: Sized {
/// Scanning stops unrecoverably at EOF, the first I/O error, or a token too
/// large to fit in the buffer. When a scan stops, the reader may have
/// advanced arbitrarily far past the last token.
pub struct Scanner<I: Input, S: Splitter> {
pub struct Scanner<'input, S: Splitter> {
/// The reader provided by the client.
input: I,
input: &'input [u8],
/// The function to tokenize the input.
splitter: S,
/// current line number
Expand All @@ -220,8 +52,8 @@ pub struct Scanner<I: Input, S: Splitter> {
column: usize,
}

impl<I: Input, S: Splitter> Scanner<I, S> {
pub fn new(input: I, splitter: S) -> Scanner<I, S> {
impl<'input, S: Splitter> Scanner<'input, S> {
pub fn new(input: &'input [u8], splitter: S) -> Scanner<'input, S> {
Scanner {
input,
splitter,
Expand All @@ -245,7 +77,7 @@ impl<I: Input, S: Splitter> Scanner<I, S> {
}

/// Reset the scanner such that it behaves as if it had never been used.
pub fn reset(&mut self, input: I) {
pub fn reset(&mut self, input: &'input [u8]) {
self.input = input;
self.line = 1;
self.column = 1;
Expand All @@ -254,22 +86,19 @@ impl<I: Input, S: Splitter> Scanner<I, S> {

type ScanResult<'input, TokenType, Error> = Result<Option<(&'input [u8], TokenType)>, Error>;

impl<I: Input, S: Splitter> Scanner<I, S> {
impl<'input, S: Splitter> Scanner<'input, S> {
/// Advance the Scanner to next token.
/// Return the token as a byte slice.
/// Return `None` when the end of the input is reached.
/// Return any error that occurs while reading the input.
pub fn scan(&mut self) -> ScanResult<'_, S::TokenType, S::Error> {
use std::mem;
debug!(target: "scanner", "scan(line: {}, column: {})", self.line, self.column);
// Loop until we have a token.
loop {
let eof = self.input.eof();
// See if we can get a token with what we already have.
if !self.input.is_empty() || eof {
// TODO: I don't know how to make the borrow checker happy!
let data = unsafe { mem::transmute(self.input.buffer()) };
match self.splitter.split(data, eof) {
if !self.input.is_empty() {
let data = self.input;
match self.splitter.split(data) {
Err(mut e) => {
e.position(self.line, self.column);
return Err(e);
Expand All @@ -289,33 +118,28 @@ impl<I: Input, S: Splitter> Scanner<I, S> {
}
}
// We cannot generate a token with what we are holding.
// If we've already hit EOF, we are done.
if eof {
// Shut it down.
return Ok(None);
}
// Must read more data.
self.input.fill_buf()?;
// we are done.
return Ok(None);
}
}

/// Consume `amt` bytes of the buffer.
fn consume(&mut self, amt: usize) {
debug!(target: "scanner", "consume({})", amt);
debug_assert!(amt <= self.input.len());
for byte in &self.input.buffer()[..amt] {
for byte in &self.input[..amt] {
if *byte == b'\n' {
self.line += 1;
self.column = 1;
} else {
self.column += 1;
}
}
self.input.consume(amt);
self.input = &self.input[amt..];
}
}

impl<I: Input, S: Splitter> fmt::Debug for Scanner<I, S> {
impl<'input, S: Splitter> fmt::Debug for Scanner<'input, S> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("Scanner")
.field("input", &self.input)
Expand Down
Loading