Skip to content

Commit

Permalink
feat: support additional parsing error information (rule call stacks …
Browse files Browse the repository at this point in the history
…and (un)expected tokens) gathering
  • Loading branch information
EmirVildanov committed Mar 11, 2024
1 parent 30c7094 commit 4c5d291
Show file tree
Hide file tree
Showing 4 changed files with 639 additions and 9 deletions.
90 changes: 90 additions & 0 deletions grammars/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -276,4 +276,94 @@ mod tests {
};
assert_eq!(expected_expr, actual_expr);
}

#[test]
fn sql_parse_attempts_error() {
fn is_whitespace(string: String) -> bool {
string == "\r\n"
|| (string.len() == 1 && string.chars().next().unwrap().is_whitespace())
}

fn rule_to_message(r: &sql::Rule) -> Option<String> {
match r {
sql::Rule::CreateTable => Some(String::from("Expected table creation.")),
sql::Rule::PrimaryKey => Some(String::from(
"Add primary key consisting of non nullable table columns.",
)),
sql::Rule::CreateUser => Some(String::from("Expected user creation.")),
sql::Rule::SingleQuotedString => {
Some(String::from("Add a string in single qoutes."))
}
sql::Rule::Query => Some(String::from("DML query expected.")),
sql::Rule::Expr => Some(String::from("Expected expression.")),
_ => None,
}
}

let rule_to_message_boxed: Box<dyn Fn(&sql::Rule) -> Option<String>> =
Box::new(rule_to_message);
let is_whitespace_boxed: Box<dyn Fn(String) -> bool> = Box::new(is_whitespace);

let retrieve_parse_attempts_error_string = |input| {
let e = sql::SqlParser::parse(sql::Rule::Command, input).unwrap_err();
let parse_attempt_error = e
.parse_attempts_error(input, &rule_to_message_boxed, &is_whitespace_boxed)
.unwrap();
format!("{parse_attempt_error}")
};

let table_creation_without_primary_key =
r#"create table t(col_1 int,) distributed by (col_1)"#;
assert_eq!(
retrieve_parse_attempts_error_string(table_creation_without_primary_key),
[
" --> 1:26",
" |",
"1 | create table t(col_1 int,) distributed by (col_1)",
" | ^---",
" |",
" = error: parsing error occurred.",
r#" note: expected one of tokens: WHITESPACE, `"`, `-`, `A..Z`, `PRIMARY`, `_`, `a..z`, `А..Я`, `а..я`"#,
" help: Expected table creation.",
" - Add primary key consisting of non nullable table columns.",
]
.join("\n")
);

let user_creation_password_without_single_qoutes = r#"create user
Bob password "wrong""#;
assert_eq!(
retrieve_parse_attempts_error_string(user_creation_password_without_single_qoutes),
[
" --> 2:81",
" |",
r#"2 | Bob password "wrong""#,
" | ^---",
" |",
" = error: parsing error occurred.",
" note: expected one of tokens: WHITESPACE, `''`, `'`",
" help: Expected user creation.",
" - Add a string in single qoutes.",
]
.join("\n")
);

let invalid_expression_in_projection = r#"select 1 + from t"#;
assert_eq!(
retrieve_parse_attempts_error_string(invalid_expression_in_projection),
[
" --> 1:12",
" |",
"1 | select 1 + from t",
" | ^---",
" |",
" = error: parsing error occurred.",
r#" note: expected one of tokens: WHITESPACE, `"`, `$`, `''`, `'`, `(`, `+`, `-`, `0..9`, `?`, `CAST`, `EXISTS`, `FALSE`, `NOT`, `NULL`, `TRUE`"#,
" note: unexpected token: `FROM`",
" help: DML query expected.",
" - Expected expression.",
]
.join("\n")
);
}
}
1 change: 1 addition & 0 deletions pest/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ serde = { version = "1.0.145", optional = true }
serde_json = { version = "1.0.85", optional = true }
thiserror = { version = "1.0.37", optional = true }
memchr = { version = "2", optional = true }
itertools = "0.10.5"

[dev-dependencies]
criterion = { version = "0.5.1", features = ["html_reports"] }
Expand Down
200 changes: 195 additions & 5 deletions pest/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,19 @@

//! Types for different kinds of parsing failures.
use crate::parser_state::{ParseAttempts, ParsingToken};
use alloc::borrow::Cow;
use alloc::borrow::ToOwned;
use alloc::collections::BTreeSet;
use alloc::format;
use alloc::string::String;
use alloc::string::ToString;
use alloc::vec::Vec;
use core::cmp;
use core::fmt;
use core::mem;
use itertools::Itertools;
use alloc::boxed::Box;

use crate::position::Position;
use crate::span::Span;
Expand All @@ -36,6 +40,7 @@ pub struct Error<R> {
path: Option<String>,
line: String,
continued_line: Option<String>,
parse_attempts: Option<ParseAttempts<R>>,
}

/// Different kinds of parsing errors.
Expand Down Expand Up @@ -87,6 +92,75 @@ impl From<Span<'_>> for LineColLocation {
}
}

/// Function mapping rule to its helper message defined by user.
pub type RuleToMessageFn<R> = Box<dyn Fn(&R) -> Option<String>>;
/// Function mapping string element to bool denoting whether it's a whitespace defined by user.
pub type IsWhitespaceFn = Box<dyn Fn(String) -> bool>;

impl ParsingToken {
pub fn is_whitespace(&self, is_whitespace: &IsWhitespaceFn) -> bool {
match self {
ParsingToken::Sensitive { token } => is_whitespace(token.clone()),
ParsingToken::Insensitive { token } => is_whitespace(token.clone()),
ParsingToken::Range { .. } => false,
ParsingToken::BuiltInRule => false,
}
}
}

impl<R: RuleType> ParseAttempts<R> {
/// Helper formatting function to get message informing about tokens we've
/// (un)expected to see.
/// Used as a part of `parse_attempts_error`.
fn tokens_message(
&self,
is_whitespace_fn: &IsWhitespaceFn,
expected: bool,
spacing: &str,
) -> Option<String> {
let tokens = if expected {
self.expected_tokens()
} else {
self.unexpected_tokens()
};

if tokens.is_empty() {
return None;
}

let mut helper_tokens_message = format!(
"{spacing}note: {} ",
if expected { "expected" } else { "unexpected" }
);
helper_tokens_message.push_str(if tokens.len() == 1 {
"token: "
} else {
"one of tokens: "
});

let expected_tokens_set: BTreeSet<String> = tokens
.iter()
.map(|token| {
if token.is_whitespace(is_whitespace_fn) {
String::from("WHITESPACE")
} else {
format!("`{}`", token)
}
})
.collect();

helper_tokens_message.push_str(
&expected_tokens_set
.iter()
.cloned()
.collect::<Vec<String>>()
.join(", "),
);

Some(helper_tokens_message)
}
}

impl<R: RuleType> Error<R> {
/// Creates `Error` from `ErrorVariant` and `Position`.
///
Expand All @@ -107,7 +181,7 @@ impl<R: RuleType> Error<R> {
/// let error = Error::new_from_pos(
/// ErrorVariant::ParsingError {
/// positives: vec![Rule::open_paren],
/// negatives: vec![Rule::closed_paren]
/// negatives: vec![Rule::closed_paren],
/// },
/// pos
/// );
Expand All @@ -129,9 +203,22 @@ impl<R: RuleType> Error<R> {
line,
continued_line: None,
line_col: LineColLocation::Pos(pos.line_col()),
parse_attempts: None,
}
}

/// Wrapper function to track `parse_attempts` as a result
/// of `state` function call in `parser_state.rs`.
pub(crate) fn new_from_pos_with_parsing_attempts(
variant: ErrorVariant<R>,
pos: Position<'_>,
parse_attempts: ParseAttempts<R>,
) -> Error<R> {
let mut error = Self::new_from_pos(variant, pos);
error.parse_attempts = Some(parse_attempts);
error
}

/// Creates `Error` from `ErrorVariant` and `Span`.
///
/// # Examples
Expand All @@ -153,7 +240,7 @@ impl<R: RuleType> Error<R> {
/// let error = Error::new_from_span(
/// ErrorVariant::ParsingError {
/// positives: vec![Rule::open_paren],
/// negatives: vec![Rule::closed_paren]
/// negatives: vec![Rule::closed_paren],
/// },
/// span
/// );
Expand Down Expand Up @@ -195,6 +282,7 @@ impl<R: RuleType> Error<R> {
line: start_line,
continued_line,
line_col: LineColLocation::Span(span.start_pos().line_col(), end_line_col),
parse_attempts: None,
}
}

Expand All @@ -217,7 +305,7 @@ impl<R: RuleType> Error<R> {
/// Error::new_from_pos(
/// ErrorVariant::ParsingError {
/// positives: vec![Rule::open_paren],
/// negatives: vec![Rule::closed_paren]
/// negatives: vec![Rule::closed_paren],
/// },
/// pos
/// ).with_path("file.rs");
Expand Down Expand Up @@ -247,7 +335,7 @@ impl<R: RuleType> Error<R> {
/// # let error = Error::new_from_pos(
/// # ErrorVariant::ParsingError {
/// # positives: vec![Rule::open_paren],
/// # negatives: vec![Rule::closed_paren]
/// # negatives: vec![Rule::closed_paren],
/// # },
/// # pos);
/// let error = error.with_path("file.rs");
Expand Down Expand Up @@ -287,7 +375,7 @@ impl<R: RuleType> Error<R> {
/// Error::new_from_pos(
/// ErrorVariant::ParsingError {
/// positives: vec![Rule::open_paren],
/// negatives: vec![Rule::closed_paren]
/// negatives: vec![Rule::closed_paren],
/// },
/// pos
/// ).renamed_rules(|rule| {
Expand Down Expand Up @@ -317,6 +405,108 @@ impl<R: RuleType> Error<R> {
self
}

/// Get detailed information about errored rules sequence.
/// Returns `Some(results)` only for `ParsingError`.
pub fn parse_attempts(&self) -> Option<ParseAttempts<R>> {
self.parse_attempts.clone()
}

/// Get error message based on parsing attempts.
/// Returns `None` in case self `parse_attempts` is `None`.
pub fn parse_attempts_error(
&self,
input: &str,
rule_to_message: &RuleToMessageFn<R>,
is_whitespace: &IsWhitespaceFn,
) -> Option<Error<R>> {
let attempts = if let Some(ref parse_attempts) = self.parse_attempts {
parse_attempts.clone()
} else {
return None;
};

let spacing = self.spacing() + " ";
let error_position = attempts.max_position;
let message = {
let mut help_lines: Vec<String> = Vec::new();
help_lines.push(String::from("error: parsing error occurred."));

// Note: at least one of `(un)expected_tokens` must not be empty.
if let Some(m) = attempts.tokens_message(is_whitespace, true, &spacing) {
help_lines.push(m)
};
if let Some(m) = attempts.tokens_message(is_whitespace, false, &spacing) {
help_lines.push(m)
};

let mut call_stacks = attempts.call_stacks();
// Call stacks with `None` parent goes in the end of the vec
// so that we can filter them in case we'll see their `deepest` as a parent.
call_stacks.sort_by(|c_st_first, c_st_second| {
c_st_first.parent.cmp(&c_st_second.parent).reverse()
});

// Group call stacks by their parents so that we can print common header and
// several sub helper messages.
let call_stacks_parents_groups: Vec<Vec<_>> = call_stacks
.into_iter()
.group_by(|call_stack| call_stack.parent)
.into_iter()
.map(|(_, group)| group.collect())
.collect();

for group in call_stacks_parents_groups {
let group_parent = group
.first()
.expect("Each group must contain at least one call stack")
.parent;
if let Some(parent_rule) = group_parent {
let mut contains_meaningful_info = false;
help_lines.push(format!(
"{spacing}help: {}",
if let Some(message) = rule_to_message(&parent_rule) {
contains_meaningful_info = true;
message
} else {
String::from("[Unknown parent rule]")
}
));
for call_stack in group {
if let Some(r) = call_stack.deepest.get_rule() {
if let Some(message) = rule_to_message(r) {
contains_meaningful_info = true;
help_lines.push(format!("{spacing} - {message}"));
}
}
}
if !contains_meaningful_info {
// Have to remove useless line for unknown parent rule.
help_lines.pop();
}
} else {
for call_stack in group {
// Note that `deepest` rule may be `None`. E.g. in case it corresponds
// to WHITESPACE expected token which has no parent rule (on the top level
// parsing).
if let Some(r) = call_stack.deepest.get_rule() {
let helper_message = rule_to_message(r);
if let Some(helper_message) = helper_message {
help_lines.push(format!("{spacing}help: {helper_message}"));
}
}
}
}
}

help_lines.join("\n")
};
let error = Error::new_from_pos(
ErrorVariant::CustomError { message },
Position::new(input, error_position).unwrap(),
);
Some(error)
}

fn start(&self) -> (usize, usize) {
match self.line_col {
LineColLocation::Pos(line_col) => line_col,
Expand Down
Loading

0 comments on commit 4c5d291

Please sign in to comment.