Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: support better error reporting #965

Merged
merged 1 commit into from
Mar 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
92 changes: 92 additions & 0 deletions grammars/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -276,4 +276,96 @@ mod tests {
};
assert_eq!(expected_expr, actual_expr);
}

#[test]
fn sql_parse_attempts_error() {
fn is_whitespace(string: String) -> bool {
string == "\r\n"
|| (string.len() == 1 && string.chars().next().unwrap().is_whitespace())
}

fn rule_to_message(r: &sql::Rule) -> Option<String> {
match r {
sql::Rule::CreateTable => Some(String::from("Expected table creation.")),
sql::Rule::PrimaryKey => Some(String::from(
"Add primary key consisting of non nullable table columns.",
)),
sql::Rule::CreateUser => Some(String::from("Expected user creation.")),
sql::Rule::SingleQuotedString => {
Some(String::from("Add a string in single qoutes."))
}
sql::Rule::Query => Some(String::from("DML query expected.")),
sql::Rule::Expr => Some(String::from("Expected expression.")),
_ => None,
}
}

type RuleToMessageBoxed = Box<dyn Fn(&sql::Rule) -> Option<String>>;
type IsWhiteSpaceBoxed = Box<dyn Fn(String) -> bool>;

let rule_to_message_boxed: RuleToMessageBoxed = Box::new(rule_to_message);
let is_whitespace_boxed: IsWhiteSpaceBoxed = Box::new(is_whitespace);

let retrieve_parse_attempts_error_string = |input| {
let e = sql::SqlParser::parse(sql::Rule::Command, input).unwrap_err();
let parse_attempt_error = e
.parse_attempts_error(input, &rule_to_message_boxed, &is_whitespace_boxed)
.unwrap();
format!("{parse_attempt_error}")
};

let table_creation_without_primary_key =
r#"create table t(col_1 int,) distributed by (col_1)"#;
assert_eq!(
retrieve_parse_attempts_error_string(table_creation_without_primary_key),
[
" --> 1:26",
" |",
"1 | create table t(col_1 int,) distributed by (col_1)",
" | ^---",
" |",
" = error: parsing error occurred.",
r#" note: expected one of tokens: WHITESPACE, `"`, `-`, `A..Z`, `PRIMARY`, `_`, `a..z`, `А..Я`, `а..я`"#,
" help: Expected table creation.",
" - Add primary key consisting of non nullable table columns.",
]
.join("\n")
);

let user_creation_password_without_single_qoutes = r#"create user
Bob password "wrong""#;
assert_eq!(
retrieve_parse_attempts_error_string(user_creation_password_without_single_qoutes),
[
" --> 2:81",
" |",
r#"2 | Bob password "wrong""#,
" | ^---",
" |",
" = error: parsing error occurred.",
" note: expected one of tokens: WHITESPACE, `''`, `'`",
" help: Expected user creation.",
" - Add a string in single qoutes.",
]
.join("\n")
);

let invalid_expression_in_projection = r#"select 1 + from t"#;
assert_eq!(
retrieve_parse_attempts_error_string(invalid_expression_in_projection),
[
" --> 1:12",
" |",
"1 | select 1 + from t",
" | ^---",
" |",
" = error: parsing error occurred.",
r#" note: expected one of tokens: WHITESPACE, `"`, `$`, `''`, `'`, `(`, `+`, `-`, `0..9`, `?`, `CAST`, `EXISTS`, `FALSE`, `NOT`, `NULL`, `TRUE`"#,
" note: unexpected token: `FROM`",
" help: DML query expected.",
" - Expected expression.",
]
.join("\n")
);
}
}
188 changes: 183 additions & 5 deletions pest/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,15 @@

//! Types for different kinds of parsing failures.

use crate::parser_state::{ParseAttempts, ParsingToken, RulesCallStack};
use alloc::borrow::Cow;
use alloc::borrow::ToOwned;
use alloc::boxed::Box;
use alloc::collections::{BTreeMap, BTreeSet};
use alloc::format;
use alloc::string::String;
use alloc::string::ToString;
use alloc::vec;
use alloc::vec::Vec;
use core::cmp;
use core::fmt;
Expand All @@ -36,6 +40,7 @@ pub struct Error<R> {
path: Option<String>,
line: String,
continued_line: Option<String>,
parse_attempts: Option<ParseAttempts<R>>,
}

/// Different kinds of parsing errors.
Expand Down Expand Up @@ -87,6 +92,74 @@ impl From<Span<'_>> for LineColLocation {
}
}

/// Function mapping rule to its helper message defined by user.
pub type RuleToMessageFn<R> = Box<dyn Fn(&R) -> Option<String>>;
/// Function mapping string element to bool denoting whether it's a whitespace defined by user.
pub type IsWhitespaceFn = Box<dyn Fn(String) -> bool>;

impl ParsingToken {
pub fn is_whitespace(&self, is_whitespace: &IsWhitespaceFn) -> bool {
match self {
ParsingToken::Sensitive { token } => is_whitespace(token.clone()),
ParsingToken::Insensitive { token } => is_whitespace(token.clone()),
ParsingToken::Range { .. } => false,
ParsingToken::BuiltInRule => false,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it seems this variant isn't tested

}
}
}

impl<R: RuleType> ParseAttempts<R> {
/// Helper formatting function to get message informing about tokens we've
/// (un)expected to see.
/// Used as a part of `parse_attempts_error`.
fn tokens_helper_messages(
&self,
is_whitespace_fn: &IsWhitespaceFn,
spacing: &str,
) -> Vec<String> {
let mut helper_messages = Vec::new();
let tokens_header_pairs = vec![
(self.expected_tokens(), "expected"),
(self.unexpected_tokens(), "unexpected"),
];

for (tokens, header) in &tokens_header_pairs {
if tokens.is_empty() {
continue;
}

let mut helper_tokens_message = format!("{spacing}note: {header} ");
helper_tokens_message.push_str(if tokens.len() == 1 {
"token: "
} else {
"one of tokens: "
});

let expected_tokens_set: BTreeSet<String> = tokens
.iter()
.map(|token| {
if token.is_whitespace(is_whitespace_fn) {
String::from("WHITESPACE")
} else {
format!("`{}`", token)
}
})
.collect();

helper_tokens_message.push_str(
&expected_tokens_set
.iter()
.cloned()
.collect::<Vec<String>>()
.join(", "),
);
helper_messages.push(helper_tokens_message);
}

helper_messages
}
}

impl<R: RuleType> Error<R> {
/// Creates `Error` from `ErrorVariant` and `Position`.
///
Expand All @@ -107,7 +180,7 @@ impl<R: RuleType> Error<R> {
/// let error = Error::new_from_pos(
/// ErrorVariant::ParsingError {
/// positives: vec![Rule::open_paren],
/// negatives: vec![Rule::closed_paren]
/// negatives: vec![Rule::closed_paren],
/// },
/// pos
/// );
Expand All @@ -129,9 +202,22 @@ impl<R: RuleType> Error<R> {
line,
continued_line: None,
line_col: LineColLocation::Pos(pos.line_col()),
parse_attempts: None,
}
}

/// Wrapper function to track `parse_attempts` as a result
/// of `state` function call in `parser_state.rs`.
pub(crate) fn new_from_pos_with_parsing_attempts(
variant: ErrorVariant<R>,
pos: Position<'_>,
parse_attempts: ParseAttempts<R>,
) -> Error<R> {
let mut error = Self::new_from_pos(variant, pos);
error.parse_attempts = Some(parse_attempts);
error
}

/// Creates `Error` from `ErrorVariant` and `Span`.
///
/// # Examples
Expand All @@ -153,7 +239,7 @@ impl<R: RuleType> Error<R> {
/// let error = Error::new_from_span(
/// ErrorVariant::ParsingError {
/// positives: vec![Rule::open_paren],
/// negatives: vec![Rule::closed_paren]
/// negatives: vec![Rule::closed_paren],
/// },
/// span
/// );
Expand Down Expand Up @@ -195,6 +281,7 @@ impl<R: RuleType> Error<R> {
line: start_line,
continued_line,
line_col: LineColLocation::Span(span.start_pos().line_col(), end_line_col),
parse_attempts: None,
}
}

Expand All @@ -217,7 +304,7 @@ impl<R: RuleType> Error<R> {
/// Error::new_from_pos(
/// ErrorVariant::ParsingError {
/// positives: vec![Rule::open_paren],
/// negatives: vec![Rule::closed_paren]
/// negatives: vec![Rule::closed_paren],
/// },
/// pos
/// ).with_path("file.rs");
Expand Down Expand Up @@ -247,7 +334,7 @@ impl<R: RuleType> Error<R> {
/// # let error = Error::new_from_pos(
/// # ErrorVariant::ParsingError {
/// # positives: vec![Rule::open_paren],
/// # negatives: vec![Rule::closed_paren]
/// # negatives: vec![Rule::closed_paren],
/// # },
/// # pos);
/// let error = error.with_path("file.rs");
Expand Down Expand Up @@ -287,7 +374,7 @@ impl<R: RuleType> Error<R> {
/// Error::new_from_pos(
/// ErrorVariant::ParsingError {
/// positives: vec![Rule::open_paren],
/// negatives: vec![Rule::closed_paren]
/// negatives: vec![Rule::closed_paren],
/// },
/// pos
/// ).renamed_rules(|rule| {
Expand Down Expand Up @@ -317,6 +404,97 @@ impl<R: RuleType> Error<R> {
self
}

/// Get detailed information about errored rules sequence.
/// Returns `Some(results)` only for `ParsingError`.
pub fn parse_attempts(&self) -> Option<ParseAttempts<R>> {
self.parse_attempts.clone()
}
Comment on lines +409 to +411
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this isn't tested, but it's a trivial method (but maybe it could be used in some assertion)


/// Get error message based on parsing attempts.
/// Returns `None` in case self `parse_attempts` is `None`.
pub fn parse_attempts_error(
&self,
input: &str,
rule_to_message: &RuleToMessageFn<R>,
is_whitespace: &IsWhitespaceFn,
) -> Option<Error<R>> {
let attempts = if let Some(ref parse_attempts) = self.parse_attempts {
parse_attempts.clone()
} else {
return None;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this case isn't tested, but it's not an important one

};

let spacing = self.spacing() + " ";
let error_position = attempts.max_position;
let message = {
let mut help_lines: Vec<String> = Vec::new();
help_lines.push(String::from("error: parsing error occurred."));

// Note: at least one of `(un)expected_tokens` must not be empty.
for tokens_helper_message in attempts.tokens_helper_messages(is_whitespace, &spacing) {
help_lines.push(tokens_helper_message);
}

let call_stacks = attempts.call_stacks();
// Group call stacks by their parents so that we can print common header and
// several sub helper messages.
let mut call_stacks_parents_groups: BTreeMap<Option<R>, Vec<RulesCallStack<R>>> =
BTreeMap::new();
for call_stack in call_stacks {
call_stacks_parents_groups
.entry(call_stack.parent)
.or_default()
.push(call_stack);
}

for (group_parent, group) in call_stacks_parents_groups {
if let Some(parent_rule) = group_parent {
let mut contains_meaningful_info = false;
help_lines.push(format!(
"{spacing}help: {}",
if let Some(message) = rule_to_message(&parent_rule) {
contains_meaningful_info = true;
message
} else {
String::from("[Unknown parent rule]")
Copy link
Contributor

@tomtau tomtau Mar 12, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

untested (but likely unimportant)

}
));
for call_stack in group {
if let Some(r) = call_stack.deepest.get_rule() {
if let Some(message) = rule_to_message(r) {
contains_meaningful_info = true;
help_lines.push(format!("{spacing} - {message}"));
}
}
}
if !contains_meaningful_info {
// Have to remove useless line for unknown parent rule.
help_lines.pop();
Copy link
Contributor

@tomtau tomtau Mar 12, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

untested (but likely unimportant)

}
} else {
for call_stack in group {
// Note that `deepest` rule may be `None`. E.g. in case it corresponds
// to WHITESPACE expected token which has no parent rule (on the top level
// parsing).
if let Some(r) = call_stack.deepest.get_rule() {
let helper_message = rule_to_message(r);
if let Some(helper_message) = helper_message {
help_lines.push(format!("{spacing}help: {helper_message}"));
}
}
}
Comment on lines +475 to +485
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

untested?

}
}

help_lines.join("\n")
};
let error = Error::new_from_pos(
ErrorVariant::CustomError { message },
Position::new(input, error_position).unwrap(),
);
Some(error)
}

fn start(&self) -> (usize, usize) {
match self.line_col {
LineColLocation::Pos(line_col) => line_col,
Expand Down
Loading
Loading