Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixes parsing of macro invocations in Lists and SExps and some cases … #654

Merged
merged 2 commits into from
Oct 6, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
171 changes: 149 additions & 22 deletions src/lazy/text/buffer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ use crate::lazy::text::matched::{
};
use crate::lazy::text::parse_result::{InvalidInputError, IonParseError};
use crate::lazy::text::parse_result::{IonMatchResult, IonParseResult};
use crate::lazy::text::raw::r#struct::LazyRawTextField_1_0;
use crate::lazy::text::raw::r#struct::{LazyRawTextField_1_0, RawTextStructIterator_1_0};
use crate::lazy::text::raw::sequence::{RawTextListIterator_1_0, RawTextSExpIterator_1_0};
use crate::lazy::text::raw::v1_1::reader::{
EncodedTextMacroInvocation, MacroIdRef, RawTextListIterator_1_1, RawTextMacroInvocation,
Expand Down Expand Up @@ -334,15 +334,31 @@ impl<'data> TextBufferView<'data> {
pub fn match_sexp_value_1_1(
self,
) -> IonParseResult<'data, Option<LazyRawValueExpr<'data, TextEncoding_1_1>>> {
alt((
whitespace_and_then(
Self::match_e_expression
.map(|matched| Some(RawValueExpr::MacroInvocation(matched))),
),
Self::match_sexp_value.map(|maybe_matched| {
whitespace_and_then(alt((
Self::match_e_expression.map(|matched| Some(RawValueExpr::MacroInvocation(matched))),
value(None, tag(")")),
pair(
opt(Self::match_annotations),
// We need the s-expression parser to recognize the input `--3` as the operator `--` and the
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🗺️ Because this delegated to match_sexp_value (implied _1_0), it would use the Ion 1.0 matchers for matching anything that is not an E-Expression inside a S-Expression, which caused this to happen:

outer::(     // Handled by match_sexp_1_1
  abc        // Handled by match_value_1_0
  (:foo)     // Handled by match_e_expression
  inner::(   // Handled by match_sexp_1_0
    def      // Handled by match_value_1_0
    (:bar)   // Invalid (for match_sexp_1_0)
  )
)

The solution was to copy/paste the content from match_sexp_value, changing it to use match_value_1_1 instead of match_value_1_0.

// int `3` while recognizing the input `-3` as the int `-3`. If `match_operator` runs before
// `match_value`, it will consume the sign (`-`) of negative number values, treating
// `-3` as an operator (`-`) and an int (`3`). Thus, we run `match_value` first.
whitespace_and_then(alt((Self::match_value_1_1, Self::match_operator))),
)
.map(|(maybe_annotations, mut value)| {
if let Some(annotations) = maybe_annotations {
value.encoded_value = value
.encoded_value
.with_annotations_sequence(annotations.offset(), annotations.len());
// Rewind the value's input to include the annotations sequence.
value.input = self.slice_to_end(annotations.offset() - self.offset());
}
Some(value)
})
.map(|maybe_matched| {
maybe_matched.map(|matched| RawValueExpr::ValueLiteral(matched.into()))
}),
))
)))
.parse(self)
}

Expand Down Expand Up @@ -790,7 +806,7 @@ impl<'data> TextBufferView<'data> {
},
),
map(
match_and_length(Self::match_struct),
match_and_length(Self::match_struct_1_1),
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🗺️ I added match_struct_1_1, so it got updated here, inside match_value_1_1.

|(matched_struct, length)| {
EncodedTextValue::new(MatchedValue::Struct, matched_struct.offset(), length)
},
Expand Down Expand Up @@ -931,18 +947,22 @@ impl<'data> TextBufferView<'data> {
pub fn match_list_value_1_1(
self,
) -> IonParseResult<'data, Option<LazyRawValueExpr<'data, TextEncoding_1_1>>> {
alt((
whitespace_and_then(
terminated(
Self::match_e_expression,
Self::match_delimiter_after_list_value,
)
.map(|matched| Some(RawValueExpr::MacroInvocation(matched))),
),
Self::match_list_value.map(|maybe_matched| {
whitespace_and_then(alt((
terminated(
Self::match_e_expression,
Self::match_delimiter_after_list_value,
)
.map(|matched| Some(RawValueExpr::MacroInvocation(matched))),
value(None, tag("]")),
terminated(
Self::match_annotated_value_1_1.map(Some),
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🗺️ Same problem here as in match_sexp_value_1_1. Fixed by copy/pasting the content of match_list_value, and altering this line to point to match_annotated_value_1_1.

// ...followed by a comma or end-of-list
Self::match_delimiter_after_list_value,
)
.map(|maybe_matched| {
maybe_matched.map(|matched| RawValueExpr::ValueLiteral(matched.into()))
}),
))
)))
.parse(self)
}

Expand Down Expand Up @@ -998,7 +1018,7 @@ impl<'data> TextBufferView<'data> {
}
// Scan ahead to find the end of this struct.
let struct_body = self.slice_to_end(1);
let struct_iter = RawTextStructIterator_1_1::new(struct_body);
let struct_iter = RawTextStructIterator_1_0::new(struct_body);
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🗺️ Because match_ion_struct was using RawTextStructIterator_1_1, it was theoretically possible that Ion 1.0 would accept E-Expressions as long as they were found inside a struct.

let span = match struct_iter.find_span() {
Ok(span) => span,
// If the complete container isn't available, return an incomplete.
Expand All @@ -1021,6 +1041,37 @@ impl<'data> TextBufferView<'data> {
Ok((remaining, matched))
}

pub fn match_struct_1_1(self) -> IonMatchResult<'data> {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🗺️ This is basically a copy/paste of match_struct, but it uses RawTextStructIterator_1_1.

// If it doesn't start with {, it isn't a struct.
if self.bytes().first() != Some(&b'{') {
let error = InvalidInputError::new(self);
return Err(nom::Err::Error(IonParseError::Invalid(error)));
}
// Scan ahead to find the end of this struct.
let struct_body = self.slice_to_end(1);
let struct_iter = RawTextStructIterator_1_1::new(struct_body);
let span = match struct_iter.find_span() {
Ok(span) => span,
// If the complete container isn't available, return an incomplete.
Err(IonError::Incomplete(_)) => return Err(nom::Err::Incomplete(Needed::Unknown)),
// If invalid syntax was encountered, return a failure to prevent nom from trying
// other parser kinds.
Err(e) => {
return {
let error = InvalidInputError::new(self)
.with_label("matching a v1.1 struct")
.with_description(format!("{}", e));
Err(nom::Err::Failure(IonParseError::Invalid(error)))
}
}
};

// For the matched span, we use `self` again to include the opening `{`
let matched = self.slice(0, span.len());
let remaining = self.slice_to_end(span.len());
Ok((remaining, matched))
}

/// Matches an e-expression invoking a macro.
///
/// If the input does not contain the entire e-expression, returns `IonError::Incomplete(_)`.
Expand Down Expand Up @@ -2276,8 +2327,9 @@ mod tests {
P: Parser<TextBufferView<'data>, O, IonParseError<'data>>,
{
let result = self.try_match(parser);
let (_remaining, match_length) = result
.unwrap_or_else(|_| panic!("Unexpected parse fail for input '{}'", self.input));
let (_remaining, match_length) = result.unwrap_or_else(|e| {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🗺️ This is a developer experience improvement. It causes a test failure to print the whole error, not just "Unexpected parse fail for input".

panic!("Unexpected parse fail for input '{}'\n{e}", self.input)
});
// Inputs have a trailing newline and `0` that should _not_ be part of the match
assert_eq!(
match_length,
Expand Down Expand Up @@ -2709,6 +2761,56 @@ mod tests {
mismatch_sexp(input);
}
}
#[test]
fn test_match_sexp_1_1() {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🗺️ I added some tests for Ion 1.1 SExps and Lists, as well as some test cases for E-Expressions that are nested inside other things. This is far from comprehensive, but it's at least an improvement.

fn match_sexp(input: &str) {
MatchTest::new(input).expect_match(match_length(TextBufferView::match_sexp_1_1));
}
fn mismatch_sexp(input: &str) {
MatchTest::new(input).expect_mismatch(match_length(TextBufferView::match_sexp_1_1));
}
let good_inputs = &[
"()",
"(1)",
"(1 2)",
"(a)",
"(a b)",
"(a++)",
"(++a)",
"(a+=b)",
"(())",
"((()))",
"(1 (2 (3 4) 5) 6)",
"(1 (:foo 2 3))",
];
for input in good_inputs {
match_sexp(input);
}

let bad_inputs = &["foo", "1", "(", "(1 2 (3 4 5)"];
for input in bad_inputs {
mismatch_sexp(input);
}
}

#[test]
fn test_match_list_1_1() {
fn match_list(input: &str) {
MatchTest::new(input).expect_match(match_length(TextBufferView::match_list_1_1));
}
fn mismatch_list(input: &str) {
MatchTest::new(input).expect_mismatch(match_length(TextBufferView::match_list_1_1));
}
let good_inputs = &["[]", "[1]", "[1, 2]", "[[]]", "[([])]", "[1, (:foo 2 3)]"];
for input in good_inputs {
match_list(input);
}

let bad_inputs = &["foo", "1", "[", "[1, 2, [3, 4]"];
for input in bad_inputs {
mismatch_list(input);
}
}

#[test]
fn test_match_macro_invocation() {
Expand Down Expand Up @@ -2739,6 +2841,31 @@ mod tests {
}
}

use rstest::rstest;
#[rstest]
#[case::simple_e_exp("(:foo)")]
#[case::e_exp_in_e_exp("(:foo (:bar 1))")]
#[case::e_exp_in_list("[a, b, (:foo 1)]")]
#[case::e_exp_in_sexp("(a (:foo 1) c)")]
#[case::e_exp_in_struct("{(:foo)}")]
// #[case::e_exp_in_struct_with_space_before("{ (:foo)}")]
#[case::e_exp_in_struct_with_space_after("{(:foo) }")]
// #[case::e_exp_in_struct_field("{a:(:foo)}")]
// #[case::e_exp_in_struct_field_with_comma("{a:(:foo),}")]
#[case::e_exp_in_struct_field_with_comma_and_second_field("{a:(:foo), b:2}")]
// #[case::e_exp_in_struct_field_with_space_before("{ a:(:foo)}")]
// #[case::e_exp_in_struct_field_with_space_after("{a:(:foo) }")]
#[case::e_exp_in_list_in_struct_field("{ a: [(:foo)] }")]
#[case::e_exp_in_sexp_in_struct_field("{ a: ((:foo)) }")]
#[case::e_exp_in_sexp_in_list("[a, b, ((:foo 1))]")]
#[case::e_exp_in_sexp_in_sexp("(a ((:foo 1)) c)")]
#[case::e_exp_in_list_in_list("[a, b, [(:foo 1)]]")]
#[case::e_exp_in_list_in_sexp("(a [(:foo 1)] c)")]
// TODO: Uncomment the above cases when fixing https://github.com/amazon-ion/ion-rust/issues/653
fn test_match_macro_invocation_in_context(#[case] input: &str) {
MatchTest::new(input).expect_match(match_length(TextBufferView::match_top_level_item_1_1));
}

#[test]
fn test_match_blob() {
fn match_blob(input: &str) {
Expand Down