diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml index f0b58650..84c1cba2 100644 --- a/.github/workflows/coverage.yml +++ b/.github/workflows/coverage.yml @@ -39,6 +39,8 @@ jobs: args: --verbose --workspace --all-features --no-fail-fast --codecov --output-path codecov.json - name: Codecov Upload uses: codecov/codecov-action@v3 + env: + CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} with: files: codecov.json fail_ci_if_error: true diff --git a/Cargo.toml b/Cargo.toml index 0c3c12b2..f7b33d9e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -84,7 +84,16 @@ rstest_reuse = "0.5.0" walkdir = "2.3" test-generator = "0.3" memmap = "0.7.0" +criterion = "0.5.1" + +[[bench]] +name = "read_many_structs" +harness = false [profile.release] lto = true codegen-units = 1 + +[profile.profiling] +inherits = "release" +debug = true diff --git a/benches/read_many_structs.rs b/benches/read_many_structs.rs new file mode 100644 index 00000000..62cc157b --- /dev/null +++ b/benches/read_many_structs.rs @@ -0,0 +1,143 @@ +use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use ion_rs::lazy::decoder::LazyDecoder; +use ion_rs::lazy::encoding::TextEncoding_1_1; +use ion_rs::lazy::r#struct::LazyStruct; +use ion_rs::lazy::reader::{LazyApplicationReader, LazyTextReader_1_1}; +use ion_rs::lazy::value::LazyValue; +use ion_rs::lazy::value_ref::ValueRef; +use ion_rs::{Element, Format, IonResult, TextKind}; +use ion_rs::{ElementReader, IonData}; + +fn rewrite_as_compact_text(pretty_ion: &str) -> IonResult { + let values = Element::read_all(pretty_ion).unwrap(); + let mut buffer = Vec::new(); + Element::write_all_as(&values, Format::Text(TextKind::Compact), &mut buffer)?; + Ok(String::from_utf8(buffer).unwrap()) +} + +pub fn criterion_benchmark(c: &mut Criterion) { + const NUM_VALUES: usize = 10_000; + let pretty_data_1_0 = r#"{ + 'timestamp': 1670446800245, + 'threadId': 418, + 'threadName': "scheduler-thread-6", + 'loggerName': "com.example.organization.product.component.ClassName", + 'logLevel': INFO, + 'format': "Request status: {} Client ID: {} Client Host: {} Client Region: {} Timestamp: {}", + 'parameters': ["SUCCESS","example-client-1","aws-us-east-5f-18b4fa","region 4","2022-12-07T20:59:59.744000Z",], + }"#.repeat(NUM_VALUES); + let data_1_0 = rewrite_as_compact_text(&pretty_data_1_0).unwrap(); + let template_text = r#" + (macro event (timestamp thread_id thread_name client_num host_id parameters) + { + 'timestamp': timestamp, + 'threadId': thread_id, + 'threadName': (make_string "scheduler-thread-" thread_name), + 'loggerName': "com.example.organization.product.component.ClassName", + 'logLevel': (quote INFO), + 'format': "Request status: {} Client ID: {} Client Host: {} Client Region: {} Timestamp: {}", + 'parameters': [ + "SUCCESS", + (make_string "example-client-" client_num), + (make_string "aws-us-east-5f-" host_id), + parameters + ] + } + ) + "#; + + let data_1_1 = r#"(:event 1670446800245 418 "6" "1" "18b4fa" (:values "region 4" "2022-12-07T20:59:59.744000Z"))"#.repeat(NUM_VALUES); + + println!("Ion 1.0 data size: {} bytes", data_1_0.len()); + println!("Ion 1.1 data size: {} bytes", data_1_1.len()); + + // As a sanity check, materialize the data from both the Ion 1.0 and 1.1 streams and make sure + // that they are equivalent before we start measuring the time needed to read them. + let seq_1_0 = LazyTextReader_1_1::new(data_1_0.as_bytes()) + .unwrap() + .read_all_elements() + .unwrap(); + let mut reader_1_1 = LazyTextReader_1_1::new(data_1_1.as_bytes()).unwrap(); + reader_1_1.register_template(template_text).unwrap(); + let seq_1_1 = reader_1_1.read_all_elements().unwrap(); + assert!( + IonData::eq(&seq_1_0, &seq_1_1), + "Ion 1.0 sequence was not equal to the Ion 1.1 sequence" + ); + + fn count_value_and_children(lazy_value: &LazyValue<'_, D>) -> IonResult { + use ValueRef::*; + let child_count = match lazy_value.read()? { + List(s) => count_sequence_children(s.iter())?, + SExp(s) => count_sequence_children(s.iter())?, + Struct(s) => count_struct_children(&s)?, + scalar => { + let _ = black_box(scalar); + 0 + } + }; + Ok(1 + child_count) + } + + fn count_sequence_children<'a, D: LazyDecoder>( + lazy_sequence: impl Iterator>>, + ) -> IonResult { + let mut count = 0; + for value in lazy_sequence { + count += count_value_and_children(&value?)?; + } + Ok(count) + } + + fn count_struct_children(lazy_struct: &LazyStruct<'_, D>) -> IonResult { + let mut count = 0; + for field in lazy_struct { + count += count_value_and_children(&field?.value())?; + } + Ok(count) + } + + c.bench_function("text 1.0: scan all", |b| { + b.iter(|| { + let mut reader = + LazyApplicationReader::<'_, TextEncoding_1_1>::new(data_1_0.as_bytes()).unwrap(); + while let Some(item) = reader.next().unwrap() { + black_box(item); + } + }) + }); + c.bench_function("text 1.0: read all", |b| { + b.iter(|| { + let mut reader = + LazyApplicationReader::<'_, TextEncoding_1_1>::new(data_1_0.as_bytes()).unwrap(); + let mut num_values = 0usize; + while let Some(item) = reader.next().unwrap() { + num_values += count_value_and_children(&item).unwrap(); + } + let _ = black_box(num_values); + }) + }); + c.bench_function("text 1.1: scan all", |b| { + b.iter(|| { + let mut reader = LazyTextReader_1_1::new(data_1_1.as_bytes()).unwrap(); + reader.register_template(template_text).unwrap(); + while let Some(item) = reader.next().unwrap() { + black_box(item); + } + }) + }); + c.bench_function("text 1.1: read all", |b| { + b.iter(|| { + let mut reader = LazyTextReader_1_1::new(data_1_1.as_bytes()).unwrap(); + reader.register_template(template_text).unwrap(); + let mut num_values = 0usize; + while let Some(item) = reader.next().unwrap() { + num_values += count_value_and_children(&item).unwrap(); + } + let _ = black_box(num_values); + }) + }); +} + +criterion_group!(benches, criterion_benchmark); +criterion_main!(benches); diff --git a/examples/lazy_read_all_values.rs b/examples/lazy_read_all_values.rs index b42efe03..32a794ba 100644 --- a/examples/lazy_read_all_values.rs +++ b/examples/lazy_read_all_values.rs @@ -61,8 +61,8 @@ mod lazy_reader_example { Ok(1 + child_count) } - fn count_sequence_children<'a, 'b>( - lazy_sequence: impl Iterator>>, + fn count_sequence_children<'a>( + lazy_sequence: impl Iterator>>, ) -> IonResult { let mut count = 0; for value in lazy_sequence { diff --git a/src/binary/non_blocking/raw_binary_reader.rs b/src/binary/non_blocking/raw_binary_reader.rs index 9df56483..2ba3beff 100644 --- a/src/binary/non_blocking/raw_binary_reader.rs +++ b/src/binary/non_blocking/raw_binary_reader.rs @@ -299,7 +299,7 @@ impl Container { /// A raw binary reader that pulls input bytes from a fixed buffer. /// /// If any read operation fails due to the buffer containing incomplete data, that method will -/// return [`IonError::Incomplete`](crate::IonError::Incomplete). +/// return [`IonError::Incomplete`]. /// /// If the buffer (generic type `A`) is a [`Vec`], then data can be appended to it between read /// operations. This can be useful when reading from a data source that is growing over time, such diff --git a/src/element/mod.rs b/src/element/mod.rs index c0cae14c..cb7d0f41 100644 --- a/src/element/mod.rs +++ b/src/element/mod.rs @@ -2,7 +2,7 @@ //! Provides a dynamically typed, materialized representation of an Ion value. //! -//! An [Element](Element) represents an `(annotations, value)` pair, where a `value` is +//! An [Element] represents an `(annotations, value)` pair, where a `value` is //! an Ion `integer`, `float`, `list`, `struct`, etc. //! //! For reference here are a couple other _value_ style APIs for JSON: diff --git a/src/ion_reader.rs b/src/ion_reader.rs index 7a426a54..6582bfd8 100644 --- a/src/ion_reader.rs +++ b/src/ion_reader.rs @@ -80,7 +80,7 @@ pub trait IonReader { /// error is encountered while reading, returns [crate::IonError]. fn read_i64(&mut self) -> IonResult; - /// Attempts to read the current item as an Ion integer and return it as an [`Int`](crate::types::Int). If the + /// Attempts to read the current item as an Ion integer and return it as an [`Int`]. If the /// current item is not an integer or an IO error is encountered while reading, returns /// [crate::IonError]. fn read_int(&mut self) -> IonResult; diff --git a/src/lazy/any_encoding.rs b/src/lazy/any_encoding.rs index f06c9e43..fae434ce 100644 --- a/src/lazy/any_encoding.rs +++ b/src/lazy/any_encoding.rs @@ -2,6 +2,8 @@ use std::fmt::Debug; +use bumpalo::Bump as BumpAllocator; + use crate::lazy::binary::raw::annotations_iterator::RawBinaryAnnotationsIterator; use crate::lazy::binary::raw::r#struct::{LazyRawBinaryStruct, RawBinaryStructIterator}; use crate::lazy::binary::raw::reader::LazyRawBinaryReader; @@ -15,9 +17,9 @@ use crate::lazy::decoder::{ LazyRawValueExpr, RawFieldExpr, RawValueExpr, }; use crate::lazy::encoding::{BinaryEncoding_1_0, TextEncoding_1_0, TextEncoding_1_1}; -use crate::lazy::expanded::macro_evaluator::MacroInvocation; +use crate::lazy::expanded::macro_evaluator::RawEExpression; use crate::lazy::never::Never; -use crate::lazy::raw_stream_item::RawStreamItem; +use crate::lazy::raw_stream_item::LazyRawStreamItem; use crate::lazy::raw_value_ref::RawValueRef; use crate::lazy::text::raw::r#struct::{LazyRawTextStruct_1_0, RawTextStructIterator_1_0}; use crate::lazy::text::raw::reader::LazyRawTextReader_1_0; @@ -26,8 +28,7 @@ use crate::lazy::text::raw::sequence::{ }; use crate::lazy::text::raw::v1_1::reader::{ LazyRawTextList_1_1, LazyRawTextSExp_1_1, LazyRawTextStruct_1_1, MacroIdRef, - RawTextListIterator_1_1, RawTextMacroInvocation, RawTextSExpIterator_1_1, - RawTextStructIterator_1_1, + RawTextEExpression_1_1, RawTextSequenceCacheIterator_1_1, RawTextStructCacheIterator_1_1, }; use crate::lazy::text::value::{ LazyRawTextValue_1_0, LazyRawTextValue_1_1, RawTextAnnotationsIterator, @@ -35,82 +36,80 @@ use crate::lazy::text::value::{ use crate::{IonResult, IonType, RawSymbolTokenRef}; /// An implementation of the `LazyDecoder` trait that can read any encoding of Ion. -#[derive(Debug, Clone)] +#[derive(Debug, Clone, Copy)] pub struct AnyEncoding; // This family of types avoids boxing and dynamic dispatch by using enums of the supported formats // within each type. Trait methods are implemented by forwarding the call to the appropriate // underlying type. -impl<'data> LazyDecoder<'data> for AnyEncoding { - type Reader = LazyRawAnyReader<'data>; - type Value = LazyRawAnyValue<'data>; - type SExp = LazyRawAnySExp<'data>; - type List = LazyRawAnyList<'data>; - type Struct = LazyRawAnyStruct<'data>; - type AnnotationsIterator = RawAnyAnnotationsIterator<'data>; - type MacroInvocation = LazyRawAnyMacroInvocation<'data>; +impl LazyDecoder for AnyEncoding { + type Reader<'data> = LazyRawAnyReader<'data>; + type Value<'top> = LazyRawAnyValue<'top>; + type SExp<'top> = LazyRawAnySExp<'top>; + type List<'top> = LazyRawAnyList<'top>; + type Struct<'top> = LazyRawAnyStruct<'top>; + type AnnotationsIterator<'top> = RawAnyAnnotationsIterator<'top>; + type EExpression<'top> = LazyRawAnyEExpression<'top>; } - #[derive(Debug, Copy, Clone)] -pub struct LazyRawAnyMacroInvocation<'data> { - encoding: LazyRawAnyMacroInvocationKind<'data>, +pub struct LazyRawAnyEExpression<'top> { + encoding: LazyRawAnyEExpressionKind<'top>, } #[derive(Debug, Copy, Clone)] -enum LazyRawAnyMacroInvocationKind<'data> { +enum LazyRawAnyEExpressionKind<'top> { // Ion 1.0 does not support macro invocations. Having these variants hold an instance of // `Never` (which cannot be instantiated) informs the compiler that it can eliminate these // branches in code paths exclusive to v1.0. Text_1_0(Never), Binary_1_0(Never), - Text_1_1(RawTextMacroInvocation<'data>), + Text_1_1(RawTextEExpression_1_1<'top>), } -impl<'data> From> for LazyRawAnyMacroInvocation<'data> { - fn from(text_invocation: RawTextMacroInvocation<'data>) -> Self { - LazyRawAnyMacroInvocation { - encoding: LazyRawAnyMacroInvocationKind::Text_1_1(text_invocation), +impl<'top> From> for LazyRawAnyEExpression<'top> { + fn from(text_invocation: RawTextEExpression_1_1<'top>) -> Self { + LazyRawAnyEExpression { + encoding: LazyRawAnyEExpressionKind::Text_1_1(text_invocation), } } } -impl<'data> MacroInvocation<'data, AnyEncoding> for LazyRawAnyMacroInvocation<'data> { - type ArgumentExpr = LazyRawValueExpr<'data, AnyEncoding>; - type ArgumentsIterator = LazyRawAnyMacroArgsIterator<'data>; +impl<'top> RawEExpression<'top, AnyEncoding> for LazyRawAnyEExpression<'top> { + type RawArgumentsIterator<'a> = LazyRawAnyMacroArgsIterator<'top,> where Self: 'a; - fn id(&self) -> MacroIdRef<'_> { + fn id(&self) -> MacroIdRef<'top> { match self.encoding { - LazyRawAnyMacroInvocationKind::Text_1_0(_) => unreachable!("macro in text Ion 1.0"), - LazyRawAnyMacroInvocationKind::Binary_1_0(_) => unreachable!("macro in binary Ion 1.0"), - LazyRawAnyMacroInvocationKind::Text_1_1(ref m) => m.id(), + LazyRawAnyEExpressionKind::Text_1_0(_) => unreachable!("macro in text Ion 1.0"), + LazyRawAnyEExpressionKind::Binary_1_0(_) => unreachable!("macro in binary Ion 1.0"), + LazyRawAnyEExpressionKind::Text_1_1(ref m) => m.id(), } } - fn arguments(&self) -> Self::ArgumentsIterator { + fn raw_arguments(&self) -> Self::RawArgumentsIterator<'_> { match self.encoding { - LazyRawAnyMacroInvocationKind::Text_1_0(_) => unreachable!("macro in text Ion 1.0"), - LazyRawAnyMacroInvocationKind::Binary_1_0(_) => unreachable!("macro in binary Ion 1.0"), - LazyRawAnyMacroInvocationKind::Text_1_1(m) => LazyRawAnyMacroArgsIterator { - encoding: LazyRawAnyMacroArgsIteratorKind::Text_1_1(m.arguments()), + LazyRawAnyEExpressionKind::Text_1_0(_) => unreachable!("macro in text Ion 1.0"), + LazyRawAnyEExpressionKind::Binary_1_0(_) => unreachable!("macro in binary Ion 1.0"), + LazyRawAnyEExpressionKind::Text_1_1(m) => LazyRawAnyMacroArgsIterator { + encoding: LazyRawAnyMacroArgsIteratorKind::Text_1_1(m.raw_arguments()), }, } } } -pub enum LazyRawAnyMacroArgsIteratorKind<'data> { +pub enum LazyRawAnyMacroArgsIteratorKind<'top> { Text_1_1( - as MacroInvocation< - 'data, + as RawEExpression< + 'top, TextEncoding_1_1, - >>::ArgumentsIterator, + >>::RawArgumentsIterator<'top>, ), } -pub struct LazyRawAnyMacroArgsIterator<'data> { - encoding: LazyRawAnyMacroArgsIteratorKind<'data>, +pub struct LazyRawAnyMacroArgsIterator<'top> { + encoding: LazyRawAnyMacroArgsIteratorKind<'top>, } -impl<'data> Iterator for LazyRawAnyMacroArgsIterator<'data> { - type Item = IonResult>; +impl<'top> Iterator for LazyRawAnyMacroArgsIterator<'top> { + type Item = IonResult>; fn next(&mut self) -> Option { match self.encoding { @@ -118,11 +117,11 @@ impl<'data> Iterator for LazyRawAnyMacroArgsIterator<'data> { Some(Ok(RawValueExpr::ValueLiteral(value))) => { Some(Ok(RawValueExpr::ValueLiteral(LazyRawAnyValue::from(value)))) } - Some(Ok(RawValueExpr::MacroInvocation(invocation))) => Some(Ok( - RawValueExpr::MacroInvocation(LazyRawAnyMacroInvocation { - encoding: LazyRawAnyMacroInvocationKind::Text_1_1(invocation), - }), - )), + Some(Ok(RawValueExpr::MacroInvocation(invocation))) => { + Some(Ok(RawValueExpr::MacroInvocation(LazyRawAnyEExpression { + encoding: LazyRawAnyEExpressionKind::Text_1_1(invocation), + }))) + } Some(Err(e)) => Some(Err(e)), None => None, }, @@ -167,9 +166,15 @@ impl<'data> LazyRawReader<'data, AnyEncoding> for LazyRawAnyReader<'data> { } } - fn next<'a>(&'a mut self) -> IonResult> { + fn next<'top>( + &'top mut self, + allocator: &'top BumpAllocator, + ) -> IonResult> + where + 'data: 'top, + { match &mut self.encoding { - RawReaderKind::Text_1_0(r) => Ok(r.next()?.into()), + RawReaderKind::Text_1_0(r) => Ok(r.next(allocator)?.into()), RawReaderKind::Binary_1_0(r) => Ok(r.next()?.into()), } } @@ -178,50 +183,48 @@ impl<'data> LazyRawReader<'data, AnyEncoding> for LazyRawAnyReader<'data> { // ===== Values ====== #[derive(Debug, Copy, Clone)] -pub struct LazyRawAnyValue<'data> { - encoding: LazyRawValueKind<'data>, +pub struct LazyRawAnyValue<'top> { + encoding: LazyRawValueKind<'top>, } #[derive(Debug, Copy, Clone)] -pub enum LazyRawValueKind<'data> { - Text_1_0(LazyRawTextValue_1_0<'data>), - Binary_1_0(LazyRawBinaryValue<'data>), - Text_1_1(LazyRawTextValue_1_1<'data>), +pub enum LazyRawValueKind<'top> { + Text_1_0(LazyRawTextValue_1_0<'top>), + Binary_1_0(LazyRawBinaryValue<'top>), + Text_1_1(LazyRawTextValue_1_1<'top>), } -impl<'data> From> for LazyRawAnyValue<'data> { - fn from(value: LazyRawTextValue_1_0<'data>) -> Self { +impl<'top> From> for LazyRawAnyValue<'top> { + fn from(value: LazyRawTextValue_1_0<'top>) -> Self { LazyRawAnyValue { encoding: LazyRawValueKind::Text_1_0(value), } } } -impl<'data> From> for LazyRawAnyValue<'data> { - fn from(value: LazyRawBinaryValue<'data>) -> Self { +impl<'top> From> for LazyRawAnyValue<'top> { + fn from(value: LazyRawBinaryValue<'top>) -> Self { LazyRawAnyValue { encoding: LazyRawValueKind::Binary_1_0(value), } } } -impl<'data> From> for LazyRawAnyValue<'data> { - fn from(value: LazyRawTextValue_1_1<'data>) -> Self { +impl<'top> From> for LazyRawAnyValue<'top> { + fn from(value: LazyRawTextValue_1_1<'top>) -> Self { LazyRawAnyValue { encoding: LazyRawValueKind::Text_1_1(value), } } } -impl<'data> From> - for LazyRawValueExpr<'data, AnyEncoding> -{ - fn from(value: LazyRawValueExpr<'data, TextEncoding_1_0>) -> Self { +impl<'top> From> for LazyRawValueExpr<'top, AnyEncoding> { + fn from(value: LazyRawValueExpr<'top, TextEncoding_1_0>) -> Self { match value { RawValueExpr::ValueLiteral(v) => RawValueExpr::ValueLiteral(v.into()), RawValueExpr::MacroInvocation(m) => { - let invocation = LazyRawAnyMacroInvocation { - encoding: LazyRawAnyMacroInvocationKind::Text_1_0(m), + let invocation = LazyRawAnyEExpression { + encoding: LazyRawAnyEExpressionKind::Text_1_0(m), }; RawValueExpr::MacroInvocation(invocation) } @@ -229,15 +232,15 @@ impl<'data> From> } } -impl<'data> From> - for LazyRawValueExpr<'data, AnyEncoding> +impl<'top> From> + for LazyRawValueExpr<'top, AnyEncoding> { - fn from(value: LazyRawValueExpr<'data, BinaryEncoding_1_0>) -> Self { + fn from(value: LazyRawValueExpr<'top, BinaryEncoding_1_0>) -> Self { match value { RawValueExpr::ValueLiteral(v) => RawValueExpr::ValueLiteral(v.into()), RawValueExpr::MacroInvocation(m) => { - let invocation = LazyRawAnyMacroInvocation { - encoding: LazyRawAnyMacroInvocationKind::Binary_1_0(m), + let invocation = LazyRawAnyEExpression { + encoding: LazyRawAnyEExpressionKind::Binary_1_0(m), }; RawValueExpr::MacroInvocation(invocation) } @@ -245,15 +248,13 @@ impl<'data> From> } } -impl<'data> From> - for LazyRawValueExpr<'data, AnyEncoding> -{ - fn from(value: LazyRawValueExpr<'data, TextEncoding_1_1>) -> Self { +impl<'top> From> for LazyRawValueExpr<'top, AnyEncoding> { + fn from(value: LazyRawValueExpr<'top, TextEncoding_1_1>) -> Self { match value { RawValueExpr::ValueLiteral(v) => RawValueExpr::ValueLiteral(v.into()), RawValueExpr::MacroInvocation(m) => { - let invocation = LazyRawAnyMacroInvocation { - encoding: LazyRawAnyMacroInvocationKind::Text_1_1(m), + let invocation = LazyRawAnyEExpression { + encoding: LazyRawAnyEExpressionKind::Text_1_1(m), }; RawValueExpr::MacroInvocation(invocation) } @@ -261,8 +262,8 @@ impl<'data> From> } } -impl<'data> From> for RawValueRef<'data, AnyEncoding> { - fn from(value: RawValueRef<'data, TextEncoding_1_0>) -> Self { +impl<'top> From> for RawValueRef<'top, AnyEncoding> { + fn from(value: RawValueRef<'top, TextEncoding_1_0>) -> Self { use RawValueRef::*; match value { Null(ion_type) => Null(ion_type), @@ -282,8 +283,8 @@ impl<'data> From> for RawValueRef<'data, An } } -impl<'data> From> for RawValueRef<'data, AnyEncoding> { - fn from(value: RawValueRef<'data, BinaryEncoding_1_0>) -> Self { +impl<'top> From> for RawValueRef<'top, AnyEncoding> { + fn from(value: RawValueRef<'top, BinaryEncoding_1_0>) -> Self { use RawValueRef::*; match value { Null(ion_type) => Null(ion_type), @@ -303,8 +304,8 @@ impl<'data> From> for RawValueRef<'data, } } -impl<'data> From> for RawValueRef<'data, AnyEncoding> { - fn from(value: RawValueRef<'data, TextEncoding_1_1>) -> Self { +impl<'top> From> for RawValueRef<'top, AnyEncoding> { + fn from(value: RawValueRef<'top, TextEncoding_1_1>) -> Self { use RawValueRef::*; match value { Null(ion_type) => Null(ion_type), @@ -324,55 +325,73 @@ impl<'data> From> for RawValueRef<'data, An } } -impl<'data> From> for RawStreamItem<'data, AnyEncoding> { - fn from(value: RawStreamItem<'data, TextEncoding_1_0>) -> Self { +impl<'top> From> + for LazyRawStreamItem<'top, AnyEncoding> +{ + fn from(value: LazyRawStreamItem<'top, TextEncoding_1_0>) -> Self { match value { - RawStreamItem::VersionMarker(major, minor) => { - RawStreamItem::VersionMarker(major, minor) + LazyRawStreamItem::::VersionMarker(major, minor) => { + LazyRawStreamItem::::VersionMarker(major, minor) } - RawStreamItem::Value(value) => RawStreamItem::Value(value.into()), - RawStreamItem::EExpression(_) => { + LazyRawStreamItem::::Value(value) => { + LazyRawStreamItem::::Value(value.into()) + } + LazyRawStreamItem::::EExpression(_) => { unreachable!("Ion 1.0 does not support macro invocations") } - RawStreamItem::EndOfStream => RawStreamItem::EndOfStream, + LazyRawStreamItem::::EndOfStream => { + LazyRawStreamItem::::EndOfStream + } } } } -impl<'data> From> for RawStreamItem<'data, AnyEncoding> { - fn from(value: RawStreamItem<'data, BinaryEncoding_1_0>) -> Self { +impl<'top> From> + for LazyRawStreamItem<'top, AnyEncoding> +{ + fn from(value: LazyRawStreamItem<'top, BinaryEncoding_1_0>) -> Self { match value { - RawStreamItem::VersionMarker(major, minor) => { - RawStreamItem::VersionMarker(major, minor) + LazyRawStreamItem::::VersionMarker(major, minor) => { + LazyRawStreamItem::::VersionMarker(major, minor) + } + LazyRawStreamItem::::Value(value) => { + LazyRawStreamItem::::Value(value.into()) } - RawStreamItem::Value(value) => RawStreamItem::Value(value.into()), - RawStreamItem::EExpression(_) => { + LazyRawStreamItem::::EExpression(_) => { unreachable!("Ion 1.0 does not support macro invocations") } - RawStreamItem::EndOfStream => RawStreamItem::EndOfStream, + LazyRawStreamItem::::EndOfStream => { + LazyRawStreamItem::::EndOfStream + } } } } -impl<'data> From> for RawStreamItem<'data, AnyEncoding> { - fn from(value: RawStreamItem<'data, TextEncoding_1_1>) -> Self { +impl<'top> From> + for LazyRawStreamItem<'top, AnyEncoding> +{ + fn from(value: LazyRawStreamItem<'top, TextEncoding_1_1>) -> Self { match value { - RawStreamItem::VersionMarker(major, minor) => { - RawStreamItem::VersionMarker(major, minor) + LazyRawStreamItem::::VersionMarker(major, minor) => { + LazyRawStreamItem::::VersionMarker(major, minor) + } + LazyRawStreamItem::::Value(value) => { + LazyRawStreamItem::::Value(value.into()) } - RawStreamItem::Value(value) => RawStreamItem::Value(value.into()), - RawStreamItem::EExpression(invocation) => { - RawStreamItem::EExpression(LazyRawAnyMacroInvocation { - encoding: LazyRawAnyMacroInvocationKind::Text_1_1(invocation), + LazyRawStreamItem::::EExpression(invocation) => { + LazyRawStreamItem::::EExpression(LazyRawAnyEExpression { + encoding: LazyRawAnyEExpressionKind::Text_1_1(invocation), }) } - RawStreamItem::EndOfStream => RawStreamItem::EndOfStream, + LazyRawStreamItem::::EndOfStream => { + LazyRawStreamItem::::EndOfStream + } } } } -impl<'data> LazyRawValuePrivate<'data> for LazyRawAnyValue<'data> { - fn field_name(&self) -> IonResult> { +impl<'top> LazyRawValuePrivate<'top> for LazyRawAnyValue<'top> { + fn field_name(&self) -> IonResult> { match &self.encoding { LazyRawValueKind::Text_1_0(v) => v.field_name(), LazyRawValueKind::Binary_1_0(v) => v.field_name(), @@ -381,7 +400,7 @@ impl<'data> LazyRawValuePrivate<'data> for LazyRawAnyValue<'data> { } } -impl<'data> LazyRawValue<'data, AnyEncoding> for LazyRawAnyValue<'data> { +impl<'top> LazyRawValue<'top, AnyEncoding> for LazyRawAnyValue<'top> { fn ion_type(&self) -> IonType { match &self.encoding { LazyRawValueKind::Text_1_0(v) => v.ion_type(), @@ -398,7 +417,7 @@ impl<'data> LazyRawValue<'data, AnyEncoding> for LazyRawAnyValue<'data> { } } - fn annotations(&self) -> RawAnyAnnotationsIterator<'data> { + fn annotations(&self) -> RawAnyAnnotationsIterator<'top> { match &self.encoding { LazyRawValueKind::Text_1_0(v) => RawAnyAnnotationsIterator { encoding: RawAnnotationsIteratorKind::Text_1_0(v.annotations()), @@ -412,7 +431,7 @@ impl<'data> LazyRawValue<'data, AnyEncoding> for LazyRawAnyValue<'data> { } } - fn read(&self) -> IonResult> { + fn read(&self) -> IonResult> { match &self.encoding { LazyRawValueKind::Text_1_0(v) => Ok(v.read()?.into()), LazyRawValueKind::Binary_1_0(v) => Ok(v.read()?.into()), @@ -423,18 +442,18 @@ impl<'data> LazyRawValue<'data, AnyEncoding> for LazyRawAnyValue<'data> { // ===== Annotations ===== -pub struct RawAnyAnnotationsIterator<'data> { - encoding: RawAnnotationsIteratorKind<'data>, +pub struct RawAnyAnnotationsIterator<'top> { + encoding: RawAnnotationsIteratorKind<'top>, } -pub enum RawAnnotationsIteratorKind<'data> { - Text_1_0(RawTextAnnotationsIterator<'data>), - Binary_1_0(RawBinaryAnnotationsIterator<'data>), - Text_1_1(RawTextAnnotationsIterator<'data>), +pub enum RawAnnotationsIteratorKind<'top> { + Text_1_0(RawTextAnnotationsIterator<'top>), + Binary_1_0(RawBinaryAnnotationsIterator<'top>), + Text_1_1(RawTextAnnotationsIterator<'top>), } -impl<'data> Iterator for RawAnyAnnotationsIterator<'data> { - type Item = IonResult>; +impl<'top> Iterator for RawAnyAnnotationsIterator<'top> { + type Item = IonResult>; fn next(&mut self) -> Option { match &mut self.encoding { @@ -448,19 +467,19 @@ impl<'data> Iterator for RawAnyAnnotationsIterator<'data> { // ===== Lists ====== #[derive(Debug, Copy, Clone)] -pub struct LazyRawAnyList<'data> { - encoding: LazyRawListKind<'data>, +pub struct LazyRawAnyList<'top> { + encoding: LazyRawListKind<'top>, } #[derive(Debug, Copy, Clone)] -pub enum LazyRawListKind<'data> { - Text_1_0(LazyRawTextList_1_0<'data>), - Binary_1_0(LazyRawBinaryList<'data>), - Text_1_1(LazyRawTextList_1_1<'data>), +pub enum LazyRawListKind<'top> { + Text_1_0(LazyRawTextList_1_0<'top>), + Binary_1_0(LazyRawBinaryList<'top>), + Text_1_1(LazyRawTextList_1_1<'top>), } -impl<'data> LazyContainerPrivate<'data, AnyEncoding> for LazyRawAnyList<'data> { - fn from_value(value: LazyRawAnyValue<'data>) -> Self { +impl<'top> LazyContainerPrivate<'top, AnyEncoding> for LazyRawAnyList<'top> { + fn from_value(value: LazyRawAnyValue<'top>) -> Self { match value.encoding { LazyRawValueKind::Text_1_0(v) => LazyRawAnyList { encoding: LazyRawListKind::Text_1_0(LazyRawTextList_1_0::from_value(v)), @@ -482,7 +501,7 @@ pub struct RawAnyListIterator<'data> { pub enum RawAnyListIteratorKind<'data> { Text_1_0(RawTextListIterator_1_0<'data>), Binary_1_0(RawBinarySequenceIterator<'data>), - Text_1_1(RawTextListIterator_1_1<'data>), + Text_1_1(RawTextSequenceCacheIterator_1_1<'data>), } impl<'data> Iterator for RawAnyListIterator<'data> { @@ -503,10 +522,10 @@ impl<'data> Iterator for RawAnyListIterator<'data> { } } -impl<'data> LazyRawSequence<'data, AnyEncoding> for LazyRawAnyList<'data> { - type Iterator = RawAnyListIterator<'data>; +impl<'top> LazyRawSequence<'top, AnyEncoding> for LazyRawAnyList<'top> { + type Iterator = RawAnyListIterator<'top>; - fn annotations(&self) -> >::AnnotationsIterator { + fn annotations(&self) -> ::AnnotationsIterator<'top> { self.as_value().annotations() } @@ -532,7 +551,7 @@ impl<'data> LazyRawSequence<'data, AnyEncoding> for LazyRawAnyList<'data> { } } - fn as_value(&self) -> LazyRawAnyValue<'data> { + fn as_value(&self) -> LazyRawAnyValue<'top> { match &self.encoding { LazyRawListKind::Text_1_0(s) => s.as_value().into(), LazyRawListKind::Binary_1_0(s) => s.as_value().into(), @@ -602,7 +621,7 @@ pub struct RawAnySExpIterator<'data> { pub enum RawAnySExpIteratorKind<'data> { Text_1_0(RawTextSExpIterator_1_0<'data>), Binary_1_0(RawBinarySequenceIterator<'data>), - Text_1_1(RawTextSExpIterator_1_1<'data>), + Text_1_1(RawTextSequenceCacheIterator_1_1<'data>), } impl<'data> Iterator for RawAnySExpIterator<'data> { @@ -623,10 +642,10 @@ impl<'data> Iterator for RawAnySExpIterator<'data> { } } -impl<'data> LazyRawSequence<'data, AnyEncoding> for LazyRawAnySExp<'data> { - type Iterator = RawAnySExpIterator<'data>; +impl<'top> LazyRawSequence<'top, AnyEncoding> for LazyRawAnySExp<'top> { + type Iterator = RawAnySExpIterator<'top>; - fn annotations(&self) -> >::AnnotationsIterator { + fn annotations(&self) -> ::AnnotationsIterator<'top> { self.as_value().annotations() } @@ -652,7 +671,7 @@ impl<'data> LazyRawSequence<'data, AnyEncoding> for LazyRawAnySExp<'data> { } } - fn as_value(&self) -> LazyRawAnyValue<'data> { + fn as_value(&self) -> LazyRawAnyValue<'top> { match &self.encoding { LazyRawSExpKind::Text_1_0(s) => (s.as_value()).into(), LazyRawSExpKind::Binary_1_0(s) => (s.as_value()).into(), @@ -706,7 +725,7 @@ pub struct RawAnyStructIterator<'data> { pub enum RawAnyStructIteratorKind<'data> { Text_1_0(RawTextStructIterator_1_0<'data>), Binary_1_0(RawBinaryStructIterator<'data>), - Text_1_1(RawTextStructIterator_1_1<'data>), + Text_1_1(RawTextStructCacheIterator_1_1<'data>), } impl<'data> Iterator for RawAnyStructIterator<'data> { @@ -791,10 +810,10 @@ impl<'data> LazyContainerPrivate<'data, AnyEncoding> for LazyRawAnyStruct<'data> } } -impl<'data> LazyRawStruct<'data, AnyEncoding> for LazyRawAnyStruct<'data> { - type Iterator = RawAnyStructIterator<'data>; +impl<'top> LazyRawStruct<'top, AnyEncoding> for LazyRawAnyStruct<'top> { + type Iterator = RawAnyStructIterator<'top>; - fn annotations(&self) -> >::AnnotationsIterator { + fn annotations(&self) -> ::AnnotationsIterator<'top> { match &self.encoding { LazyRawStructKind::Text_1_0(s) => RawAnyAnnotationsIterator { encoding: RawAnnotationsIteratorKind::Text_1_0(s.annotations()), @@ -861,41 +880,55 @@ mod tests { use crate::lazy::any_encoding::LazyRawAnyReader; use crate::lazy::binary::test_utilities::to_binary_ion; use crate::lazy::decoder::{LazyRawReader, LazyRawSequence, LazyRawValue}; - use crate::lazy::raw_stream_item::RawStreamItem; + use crate::lazy::raw_stream_item::LazyRawStreamItem; use crate::lazy::raw_value_ref::RawValueRef; use crate::{IonResult, RawSymbolTokenRef, Timestamp}; + use super::*; + #[test] fn any_encoding() -> IonResult<()> { fn test_input(data: &[u8]) -> IonResult<()> { + let allocator = BumpAllocator::new(); + let mut reader = LazyRawAnyReader::new(data); - assert_eq!(reader.next()?.expect_ivm()?, (1, 0)); - let _strukt = reader.next()?.expect_value()?.read()?.expect_struct()?; - let name = reader.next()?.expect_value()?; + assert_eq!(reader.next(&allocator)?.expect_ivm()?, (1, 0)); + let _strukt = reader + .next(&allocator)? + .expect_value()? + .read()? + .expect_struct()?; + let name = reader.next(&allocator)?.expect_value()?; assert_eq!( name.annotations().next().unwrap()?, RawSymbolTokenRef::SymbolId(4) ); assert_eq!(name.read()?.expect_string()?.text(), "Gary"); assert_eq!( - reader.next()?.expect_value()?.read()?, + reader.next(&allocator)?.expect_value()?.read()?, RawValueRef::String("foo".into()) ); assert_eq!( - reader.next()?.expect_value()?.read()?, + reader.next(&allocator)?.expect_value()?.read()?, RawValueRef::Int(5.into()) ); assert_eq!( - reader.next()?.expect_value()?.read()?, + reader.next(&allocator)?.expect_value()?.read()?, RawValueRef::Timestamp(Timestamp::with_year(2023).with_month(8).build()?) ); assert_eq!( - reader.next()?.expect_value()?.read()?, + reader.next(&allocator)?.expect_value()?.read()?, RawValueRef::Bool(false) ); let mut sum = 0; - for lazy_value_result in reader.next()?.expect_value()?.read()?.expect_list()?.iter() { + for lazy_value_result in reader + .next(&allocator)? + .expect_value()? + .read()? + .expect_list()? + .iter() + { sum += lazy_value_result?.expect_value()?.read()?.expect_i64()?; } assert_eq!(sum, 6); @@ -903,7 +936,10 @@ mod tests { // We cannot test structs here because using them forces the binary encoding to have a // local symbol table and the raw reader interprets that as a different value. - assert!(matches!(reader.next()?, RawStreamItem::EndOfStream)); + assert!(matches!( + reader.next(&allocator)?, + LazyRawStreamItem::::EndOfStream + )); Ok(()) } diff --git a/src/lazy/binary/raw/reader.rs b/src/lazy/binary/raw/reader.rs index ed90caa7..a44b8d14 100644 --- a/src/lazy/binary/raw/reader.rs +++ b/src/lazy/binary/raw/reader.rs @@ -2,10 +2,12 @@ use crate::lazy::binary::immutable_buffer::ImmutableBuffer; use crate::lazy::binary::raw::value::LazyRawBinaryValue; use crate::lazy::decoder::LazyRawReader; use crate::lazy::encoding::BinaryEncoding_1_0; -use crate::lazy::raw_stream_item::RawStreamItem; +use crate::lazy::raw_stream_item::{LazyRawStreamItem, RawStreamItem}; use crate::result::IonFailure; use crate::IonResult; +use bumpalo::Bump as BumpAllocator; + /// A binary Ion 1.0 reader that yields [`LazyRawBinaryValue`]s representing the top level values found /// in the provided input stream. pub struct LazyRawBinaryReader<'data> { @@ -29,10 +31,13 @@ impl<'data> LazyRawBinaryReader<'data> { /// Helper method called by [`Self::next`]. Reads the current stream item as an Ion version /// marker. If the version is not 1.0, returns an [`crate::IonError::Decoding`]. - fn read_ivm( + fn read_ivm<'top>( &mut self, buffer: ImmutableBuffer<'data>, - ) -> IonResult> { + ) -> IonResult> + where + 'data: 'top, + { let ((major, minor), _buffer_after_ivm) = buffer.read_ivm()?; if (major, minor) != (1, 0) { return IonResult::decoding_error(format!( @@ -42,29 +47,32 @@ impl<'data> LazyRawBinaryReader<'data> { } self.data.buffer = buffer; self.data.bytes_to_skip = 4; // IVM length - return Ok(RawStreamItem::VersionMarker(1, 0)); + Ok(LazyRawStreamItem::::VersionMarker(1, 0)) } - fn read_value( + fn read_value<'top>( &mut self, buffer: ImmutableBuffer<'data>, - ) -> IonResult> { + ) -> IonResult> + where + 'data: 'top, + { let lazy_value = match ImmutableBuffer::peek_sequence_value(buffer)? { Some(lazy_value) => lazy_value, - None => return Ok(RawStreamItem::EndOfStream), + None => return Ok(LazyRawStreamItem::::EndOfStream), }; self.data.buffer = buffer; self.data.bytes_to_skip = lazy_value.encoded_value.total_length(); Ok(RawStreamItem::Value(lazy_value)) } - pub fn next<'top>(&'top mut self) -> IonResult> + pub fn next<'top>(&'top mut self) -> IonResult> where 'data: 'top, { let mut buffer = self.data.advance_to_next_item()?; if buffer.is_empty() { - return Ok(RawStreamItem::EndOfStream); + return Ok(LazyRawStreamItem::::EndOfStream); } let type_descriptor = buffer.peek_type_descriptor()?; if type_descriptor.is_nop() { @@ -82,7 +90,13 @@ impl<'data> LazyRawReader<'data, BinaryEncoding_1_0> for LazyRawBinaryReader<'da LazyRawBinaryReader::new(data) } - fn next<'a>(&'a mut self) -> IonResult> { + fn next<'top>( + &'top mut self, + _allocator: &'top BumpAllocator, + ) -> IonResult> + where + 'data: 'top, + { self.next() } } @@ -230,11 +244,14 @@ mod tests { )?; let mut reader = LazyRawBinaryReader::new(data); loop { + use RawStreamItem::*; match reader.next()? { - RawStreamItem::VersionMarker(major, minor) => println!("IVM: v{}.{}", major, minor), - RawStreamItem::Value(value) => println!("{:?}", value.read()?), - RawStreamItem::EndOfStream => break, - RawStreamItem::EExpression(_) => unreachable!("No macros in Ion 1.0"), + VersionMarker(major, minor) => { + println!("IVM: v{}.{}", major, minor) + } + Value(value) => println!("{:?}", value.read()?), + EndOfStream => break, + EExpression(_) => unreachable!("No macros in Ion 1.0"), } } Ok(()) diff --git a/src/lazy/binary/raw/sequence.rs b/src/lazy/binary/raw/sequence.rs index 455d453b..1fb86705 100644 --- a/src/lazy/binary/raw/sequence.rs +++ b/src/lazy/binary/raw/sequence.rs @@ -9,27 +9,27 @@ use crate::{IonResult, IonType}; use std::fmt::{Debug, Formatter}; #[derive(Debug, Copy, Clone)] -pub struct LazyRawBinaryList<'data> { - pub(crate) sequence: LazyRawBinarySequence<'data>, +pub struct LazyRawBinaryList<'top> { + pub(crate) sequence: LazyRawBinarySequence<'top>, } #[derive(Debug, Copy, Clone)] -pub struct LazyRawBinarySExp<'data> { - pub(crate) sequence: LazyRawBinarySequence<'data>, +pub struct LazyRawBinarySExp<'top> { + pub(crate) sequence: LazyRawBinarySequence<'top>, } -impl<'data> LazyContainerPrivate<'data, BinaryEncoding_1_0> for LazyRawBinaryList<'data> { - fn from_value(value: LazyRawBinaryValue<'data>) -> Self { +impl<'top> LazyContainerPrivate<'top, BinaryEncoding_1_0> for LazyRawBinaryList<'top> { + fn from_value(value: LazyRawBinaryValue<'top>) -> Self { LazyRawBinaryList { sequence: LazyRawBinarySequence { value }, } } } -impl<'data> LazyRawSequence<'data, BinaryEncoding_1_0> for LazyRawBinaryList<'data> { - type Iterator = RawBinarySequenceIterator<'data>; +impl<'top> LazyRawSequence<'top, BinaryEncoding_1_0> for LazyRawBinaryList<'top> { + type Iterator = RawBinarySequenceIterator<'top>; - fn annotations(&self) -> RawBinaryAnnotationsIterator<'data> { + fn annotations(&self) -> RawBinaryAnnotationsIterator<'top> { self.sequence.value.annotations() } @@ -41,23 +41,23 @@ impl<'data> LazyRawSequence<'data, BinaryEncoding_1_0> for LazyRawBinaryList<'da self.sequence.iter() } - fn as_value(&self) -> LazyRawBinaryValue<'data> { + fn as_value(&self) -> LazyRawBinaryValue<'top> { self.sequence.value } } -impl<'data> LazyContainerPrivate<'data, BinaryEncoding_1_0> for LazyRawBinarySExp<'data> { - fn from_value(value: LazyRawBinaryValue<'data>) -> Self { +impl<'top> LazyContainerPrivate<'top, BinaryEncoding_1_0> for LazyRawBinarySExp<'top> { + fn from_value(value: LazyRawBinaryValue<'top>) -> Self { LazyRawBinarySExp { sequence: LazyRawBinarySequence { value }, } } } -impl<'data> LazyRawSequence<'data, BinaryEncoding_1_0> for LazyRawBinarySExp<'data> { - type Iterator = RawBinarySequenceIterator<'data>; +impl<'top> LazyRawSequence<'top, BinaryEncoding_1_0> for LazyRawBinarySExp<'top> { + type Iterator = RawBinarySequenceIterator<'top>; - fn annotations(&self) -> RawBinaryAnnotationsIterator<'data> { + fn annotations(&self) -> RawBinaryAnnotationsIterator<'top> { self.sequence.value.annotations() } @@ -69,22 +69,22 @@ impl<'data> LazyRawSequence<'data, BinaryEncoding_1_0> for LazyRawBinarySExp<'da self.sequence.iter() } - fn as_value(&self) -> LazyRawBinaryValue<'data> { + fn as_value(&self) -> LazyRawBinaryValue<'top> { self.sequence.value } } #[derive(Copy, Clone)] -pub struct LazyRawBinarySequence<'data> { - pub(crate) value: LazyRawBinaryValue<'data>, +pub struct LazyRawBinarySequence<'top> { + pub(crate) value: LazyRawBinaryValue<'top>, } -impl<'data> LazyRawBinarySequence<'data> { +impl<'top> LazyRawBinarySequence<'top> { pub fn ion_type(&self) -> IonType { self.value.ion_type() } - pub fn iter(&self) -> RawBinarySequenceIterator<'data> { + pub fn iter(&self) -> RawBinarySequenceIterator<'top> { // Get as much of the sequence's body as is available in the input buffer. // Reading a child value may fail as `Incomplete` let buffer_slice = self.value.available_body(); @@ -92,9 +92,9 @@ impl<'data> LazyRawBinarySequence<'data> { } } -impl<'a, 'data> IntoIterator for &'a LazyRawBinarySequence<'data> { - type Item = IonResult>; - type IntoIter = RawBinarySequenceIterator<'data>; +impl<'a, 'top> IntoIterator for &'a LazyRawBinarySequence<'top> { + type Item = IonResult>; + type IntoIter = RawBinarySequenceIterator<'top>; fn into_iter(self) -> Self::IntoIter { self.iter() @@ -125,20 +125,20 @@ impl<'a> Debug for LazyRawBinarySequence<'a> { } } -pub struct RawBinarySequenceIterator<'data> { - source: DataSource<'data>, +pub struct RawBinarySequenceIterator<'top> { + source: DataSource<'top>, } -impl<'data> RawBinarySequenceIterator<'data> { - pub(crate) fn new(input: ImmutableBuffer<'data>) -> RawBinarySequenceIterator<'data> { +impl<'top> RawBinarySequenceIterator<'top> { + pub(crate) fn new(input: ImmutableBuffer<'top>) -> RawBinarySequenceIterator<'top> { RawBinarySequenceIterator { source: DataSource::new(input), } } } -impl<'data> Iterator for RawBinarySequenceIterator<'data> { - type Item = IonResult>; +impl<'top> Iterator for RawBinarySequenceIterator<'top> { + type Item = IonResult>; fn next(&mut self) -> Option { match self diff --git a/src/lazy/binary/raw/struct.rs b/src/lazy/binary/raw/struct.rs index d7c9a103..39cf3702 100644 --- a/src/lazy/binary/raw/struct.rs +++ b/src/lazy/binary/raw/struct.rs @@ -15,20 +15,20 @@ use crate::lazy::encoding::BinaryEncoding_1_0; use crate::{IonResult, RawSymbolTokenRef}; #[derive(Copy, Clone)] -pub struct LazyRawBinaryStruct<'data> { - pub(crate) value: LazyRawBinaryValue<'data>, +pub struct LazyRawBinaryStruct<'top> { + pub(crate) value: LazyRawBinaryValue<'top>, } -impl<'a, 'data> IntoIterator for &'a LazyRawBinaryStruct<'data> { - type Item = IonResult>; - type IntoIter = RawBinaryStructIterator<'data>; +impl<'a, 'top> IntoIterator for &'a LazyRawBinaryStruct<'top> { + type Item = IonResult>; + type IntoIter = RawBinaryStructIterator<'top>; fn into_iter(self) -> Self::IntoIter { self.iter() } } -impl<'a> Debug for LazyRawBinaryStruct<'a> { +impl<'top> Debug for LazyRawBinaryStruct<'top> { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { write!(f, "{{")?; for field in self { @@ -41,12 +41,12 @@ impl<'a> Debug for LazyRawBinaryStruct<'a> { } } -impl<'data> LazyRawBinaryStruct<'data> { - fn annotations(&self) -> RawBinaryAnnotationsIterator<'data> { +impl<'top> LazyRawBinaryStruct<'top> { + fn annotations(&self) -> RawBinaryAnnotationsIterator<'top> { self.value.annotations() } - pub fn iter(&self) -> RawBinaryStructIterator<'data> { + pub fn iter(&self) -> RawBinaryStructIterator<'top> { // Get as much of the struct's body as is available in the input buffer. // Reading a child value may fail as `Incomplete` let buffer_slice = self.value.available_body(); @@ -54,16 +54,16 @@ impl<'data> LazyRawBinaryStruct<'data> { } } -impl<'data> LazyContainerPrivate<'data, BinaryEncoding_1_0> for LazyRawBinaryStruct<'data> { - fn from_value(value: LazyRawBinaryValue<'data>) -> Self { +impl<'top> LazyContainerPrivate<'top, BinaryEncoding_1_0> for LazyRawBinaryStruct<'top> { + fn from_value(value: LazyRawBinaryValue<'top>) -> Self { LazyRawBinaryStruct { value } } } -impl<'data> LazyRawStruct<'data, BinaryEncoding_1_0> for LazyRawBinaryStruct<'data> { - type Iterator = RawBinaryStructIterator<'data>; +impl<'top> LazyRawStruct<'top, BinaryEncoding_1_0> for LazyRawBinaryStruct<'top> { + type Iterator = RawBinaryStructIterator<'top>; - fn annotations(&self) -> RawBinaryAnnotationsIterator<'data> { + fn annotations(&self) -> RawBinaryAnnotationsIterator<'top> { self.annotations() } @@ -72,20 +72,20 @@ impl<'data> LazyRawStruct<'data, BinaryEncoding_1_0> for LazyRawBinaryStruct<'da } } -pub struct RawBinaryStructIterator<'data> { - source: DataSource<'data>, +pub struct RawBinaryStructIterator<'top> { + source: DataSource<'top>, } -impl<'data> RawBinaryStructIterator<'data> { - pub(crate) fn new(input: ImmutableBuffer<'data>) -> RawBinaryStructIterator<'data> { +impl<'top> RawBinaryStructIterator<'top> { + pub(crate) fn new(input: ImmutableBuffer<'top>) -> RawBinaryStructIterator<'top> { RawBinaryStructIterator { source: DataSource::new(input), } } } -impl<'data> Iterator for RawBinaryStructIterator<'data> { - type Item = IonResult>; +impl<'top> Iterator for RawBinaryStructIterator<'top> { + type Item = IonResult>; fn next(&mut self) -> Option { match self.source.try_parse_next(ImmutableBuffer::peek_field) { @@ -100,47 +100,47 @@ impl<'data> Iterator for RawBinaryStructIterator<'data> { } #[derive(Copy, Clone)] -pub struct LazyRawBinaryField<'data> { - pub(crate) value: LazyRawBinaryValue<'data>, +pub struct LazyRawBinaryField<'top> { + pub(crate) value: LazyRawBinaryValue<'top>, } -impl<'data> LazyRawBinaryField<'data> { - pub(crate) fn new(value: LazyRawBinaryValue<'data>) -> Self { +impl<'top> LazyRawBinaryField<'top> { + pub(crate) fn new(value: LazyRawBinaryValue<'top>) -> Self { LazyRawBinaryField { value } } - pub fn name(&self) -> RawSymbolTokenRef<'data> { + pub fn name(&self) -> RawSymbolTokenRef<'top> { // We're in a struct field, the field ID must be populated. let field_id = self.value.encoded_value.field_id.unwrap(); RawSymbolTokenRef::SymbolId(field_id) } - pub fn value(&self) -> LazyRawBinaryValue<'data> { + pub fn value(&self) -> LazyRawBinaryValue<'top> { self.value } - pub(crate) fn into_value(self) -> LazyRawBinaryValue<'data> { + pub(crate) fn into_value(self) -> LazyRawBinaryValue<'top> { self.value } } -impl<'data> LazyRawFieldPrivate<'data, BinaryEncoding_1_0> for LazyRawBinaryField<'data> { - fn into_value(self) -> LazyRawBinaryValue<'data> { +impl<'top> LazyRawFieldPrivate<'top, BinaryEncoding_1_0> for LazyRawBinaryField<'top> { + fn into_value(self) -> LazyRawBinaryValue<'top> { self.value } } -impl<'data> LazyRawField<'data, BinaryEncoding_1_0> for LazyRawBinaryField<'data> { - fn name(&self) -> RawSymbolTokenRef<'data> { +impl<'top> LazyRawField<'top, BinaryEncoding_1_0> for LazyRawBinaryField<'top> { + fn name(&self) -> RawSymbolTokenRef<'top> { LazyRawBinaryField::name(self) } - fn value(&self) -> LazyRawBinaryValue<'data> { + fn value(&self) -> LazyRawBinaryValue<'top> { self.value() } } -impl<'a> Debug for LazyRawBinaryField<'a> { +impl<'top> Debug for LazyRawBinaryField<'top> { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { write!( f, diff --git a/src/lazy/binary/raw/value.rs b/src/lazy/binary/raw/value.rs index 205b31aa..6c556c44 100644 --- a/src/lazy/binary/raw/value.rs +++ b/src/lazy/binary/raw/value.rs @@ -29,12 +29,12 @@ use std::{fmt, mem}; /// includes a text definition for these items whenever one exists, see /// [`crate::lazy::value::LazyValue`]. #[derive(Clone, Copy)] -pub struct LazyRawBinaryValue<'data> { +pub struct LazyRawBinaryValue<'top> { pub(crate) encoded_value: EncodedValue, - pub(crate) input: ImmutableBuffer<'data>, + pub(crate) input: ImmutableBuffer<'top>, } -impl<'a> Debug for LazyRawBinaryValue<'a> { +impl<'top> Debug for LazyRawBinaryValue<'top> { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { write!( f, @@ -44,10 +44,10 @@ impl<'a> Debug for LazyRawBinaryValue<'a> { } } -type ValueParseResult<'data, F> = IonResult>; +type ValueParseResult<'top, F> = IonResult>; -impl<'data> LazyRawValuePrivate<'data> for LazyRawBinaryValue<'data> { - fn field_name(&self) -> IonResult> { +impl<'top> LazyRawValuePrivate<'top> for LazyRawBinaryValue<'top> { + fn field_name(&self) -> IonResult> { if let Some(field_id) = self.encoded_value.field_id { Ok(RawSymbolTokenRef::SymbolId(field_id)) } else { @@ -58,7 +58,7 @@ impl<'data> LazyRawValuePrivate<'data> for LazyRawBinaryValue<'data> { } } -impl<'data> LazyRawValue<'data, BinaryEncoding_1_0> for LazyRawBinaryValue<'data> { +impl<'top> LazyRawValue<'top, BinaryEncoding_1_0> for LazyRawBinaryValue<'top> { fn ion_type(&self) -> IonType { self.ion_type() } @@ -67,16 +67,16 @@ impl<'data> LazyRawValue<'data, BinaryEncoding_1_0> for LazyRawBinaryValue<'data self.is_null() } - fn annotations(&self) -> RawBinaryAnnotationsIterator<'data> { + fn annotations(&self) -> RawBinaryAnnotationsIterator<'top> { self.annotations() } - fn read(&self) -> IonResult> { + fn read(&self) -> IonResult> { self.read() } } -impl<'data> LazyRawBinaryValue<'data> { +impl<'top> LazyRawBinaryValue<'top> { /// Indicates the Ion data type of this value. Calling this method does not require additional /// parsing of the input stream. pub fn ion_type(&self) -> IonType { @@ -94,7 +94,7 @@ impl<'data> LazyRawBinaryValue<'data> { /// Returns an `ImmutableBuffer` that contains the bytes comprising this value's encoded /// annotations sequence. - fn annotations_sequence(&self) -> ImmutableBuffer<'data> { + fn annotations_sequence(&self) -> ImmutableBuffer<'top> { let offset_and_length = self .encoded_value .annotations_sequence_offset() @@ -126,7 +126,7 @@ impl<'data> LazyRawBinaryValue<'data> { } /// Returns an iterator over this value's unresolved annotation symbols. - pub fn annotations(&self) -> RawBinaryAnnotationsIterator<'data> { + pub fn annotations(&self) -> RawBinaryAnnotationsIterator<'top> { RawBinaryAnnotationsIterator::new(self.annotations_sequence()) } @@ -134,7 +134,7 @@ impl<'data> LazyRawBinaryValue<'data> { /// calling this method will not read additional data; the `RawValueRef` will provide a /// [`LazyRawBinarySequence`] or [`LazyStruct`](crate::lazy::struct::LazyStruct) /// that can be traversed to access the container's contents. - pub fn read(&self) -> ValueParseResult<'data, BinaryEncoding_1_0> { + pub fn read(&self) -> ValueParseResult<'top, BinaryEncoding_1_0> { if self.is_null() { let raw_value_ref = RawValueRef::Null(self.ion_type()); return Ok(raw_value_ref); @@ -158,7 +158,7 @@ impl<'data> LazyRawBinaryValue<'data> { } /// Returns the encoded byte slice representing this value's data. - fn value_body(&self) -> IonResult<&'data [u8]> { + fn value_body(&self) -> IonResult<&'top [u8]> { let value_total_length = self.encoded_value.total_length(); if self.input.len() < value_total_length { eprintln!("[value_body] Incomplete {:?}", self); @@ -175,7 +175,7 @@ impl<'data> LazyRawBinaryValue<'data> { /// Returns an [`ImmutableBuffer`] containing whatever bytes of this value's body are currently /// available. This method is used to construct lazy containers, which are not required to be /// fully buffered before reading begins. - pub(crate) fn available_body(&self) -> ImmutableBuffer<'data> { + pub(crate) fn available_body(&self) -> ImmutableBuffer<'top> { let value_total_length = self.encoded_value.total_length(); let value_body_length = self.encoded_value.value_length(); let value_offset = value_total_length - value_body_length; @@ -192,7 +192,7 @@ impl<'data> LazyRawBinaryValue<'data> { } /// Helper method called by [`Self::read`]. Reads the current value as a bool. - fn read_bool(&self) -> ValueParseResult<'data, BinaryEncoding_1_0> { + fn read_bool(&self) -> ValueParseResult<'top, BinaryEncoding_1_0> { debug_assert!(self.encoded_value.ion_type() == IonType::Bool); let representation = self.encoded_value.header().length_code; let value = match representation { @@ -209,7 +209,7 @@ impl<'data> LazyRawBinaryValue<'data> { } /// Helper method called by [`Self::read`]. Reads the current value as an int. - fn read_int(&self) -> ValueParseResult<'data, BinaryEncoding_1_0> { + fn read_int(&self) -> ValueParseResult<'top, BinaryEncoding_1_0> { debug_assert!(self.encoded_value.ion_type() == IonType::Int); // `value_body()` returns a buffer starting at the body of the value. // It also confirms that the entire value is in the buffer. @@ -236,7 +236,7 @@ impl<'data> LazyRawBinaryValue<'data> { } /// Helper method called by [`Self::read`]. Reads the current value as a float. - fn read_float(&self) -> ValueParseResult<'data, BinaryEncoding_1_0> { + fn read_float(&self) -> ValueParseResult<'top, BinaryEncoding_1_0> { debug_assert!(self.encoded_value.ion_type() == IonType::Float); let ieee_bytes = self.value_body()?; let number_of_bytes = self.encoded_value.value_length(); @@ -250,7 +250,7 @@ impl<'data> LazyRawBinaryValue<'data> { } /// Helper method called by [`Self::read`]. Reads the current value as a decimal. - fn read_decimal(&self) -> ValueParseResult<'data, BinaryEncoding_1_0> { + fn read_decimal(&self) -> ValueParseResult<'top, BinaryEncoding_1_0> { debug_assert!(self.encoded_value.ion_type() == IonType::Decimal); if self.encoded_value.value_length() == 0 { @@ -277,7 +277,7 @@ impl<'data> LazyRawBinaryValue<'data> { } /// Helper method called by [`Self::read`]. Reads the current value as a timestamp. - fn read_timestamp(&self) -> ValueParseResult<'data, BinaryEncoding_1_0> { + fn read_timestamp(&self) -> ValueParseResult<'top, BinaryEncoding_1_0> { debug_assert!(self.encoded_value.ion_type() == IonType::Timestamp); let input = ImmutableBuffer::new(self.value_body()?); @@ -387,14 +387,14 @@ impl<'data> LazyRawBinaryValue<'data> { } /// Helper method called by [`Self::read`]. Reads the current value as a symbol. - fn read_symbol(&self) -> ValueParseResult<'data, BinaryEncoding_1_0> { + fn read_symbol(&self) -> ValueParseResult<'top, BinaryEncoding_1_0> { debug_assert!(self.encoded_value.ion_type() == IonType::Symbol); self.read_symbol_id() .map(|sid| RawValueRef::Symbol(RawSymbolTokenRef::SymbolId(sid))) } /// Helper method called by [`Self::read`]. Reads the current value as a string. - fn read_string(&self) -> ValueParseResult<'data, BinaryEncoding_1_0> { + fn read_string(&self) -> ValueParseResult<'top, BinaryEncoding_1_0> { debug_assert!(self.encoded_value.ion_type() == IonType::String); let raw_bytes = self.value_body()?; let text = std::str::from_utf8(raw_bytes) @@ -403,21 +403,21 @@ impl<'data> LazyRawBinaryValue<'data> { } /// Helper method called by [`Self::read`]. Reads the current value as a blob. - fn read_blob(&self) -> ValueParseResult<'data, BinaryEncoding_1_0> { + fn read_blob(&self) -> ValueParseResult<'top, BinaryEncoding_1_0> { debug_assert!(self.encoded_value.ion_type() == IonType::Blob); let bytes = self.value_body()?; Ok(RawValueRef::Blob(bytes.into())) } /// Helper method called by [`Self::read`]. Reads the current value as a clob. - fn read_clob(&self) -> ValueParseResult<'data, BinaryEncoding_1_0> { + fn read_clob(&self) -> ValueParseResult<'top, BinaryEncoding_1_0> { debug_assert!(self.encoded_value.ion_type() == IonType::Clob); let bytes = self.value_body()?; Ok(RawValueRef::Clob(bytes.into())) } /// Helper method called by [`Self::read`]. Reads the current value as an S-expression. - fn read_sexp(&self) -> ValueParseResult<'data, BinaryEncoding_1_0> { + fn read_sexp(&self) -> ValueParseResult<'top, BinaryEncoding_1_0> { debug_assert!(self.encoded_value.ion_type() == IonType::SExp); let lazy_value = LazyRawBinaryValue { encoded_value: self.encoded_value, @@ -431,7 +431,7 @@ impl<'data> LazyRawBinaryValue<'data> { } /// Helper method called by [`Self::read`]. Reads the current value as a list. - fn read_list(&self) -> ValueParseResult<'data, BinaryEncoding_1_0> { + fn read_list(&self) -> ValueParseResult<'top, BinaryEncoding_1_0> { debug_assert!(self.encoded_value.ion_type() == IonType::List); let lazy_value = LazyRawBinaryValue { encoded_value: self.encoded_value, @@ -445,7 +445,7 @@ impl<'data> LazyRawBinaryValue<'data> { } /// Helper method called by [`Self::read`]. Reads the current value as a struct. - fn read_struct(&self) -> ValueParseResult<'data, BinaryEncoding_1_0> { + fn read_struct(&self) -> ValueParseResult<'top, BinaryEncoding_1_0> { debug_assert!(self.encoded_value.ion_type() == IonType::Struct); let lazy_value = LazyRawBinaryValue { encoded_value: self.encoded_value, diff --git a/src/lazy/decoder.rs b/src/lazy/decoder.rs index 8ed66736..c12a1130 100644 --- a/src/lazy/decoder.rs +++ b/src/lazy/decoder.rs @@ -1,38 +1,37 @@ use std::fmt::Debug; -use crate::lazy::encoding::TextEncoding_1_1; -use crate::lazy::expanded::macro_evaluator::MacroInvocation; -use crate::lazy::raw_stream_item::RawStreamItem; +use bumpalo::Bump as BumpAllocator; + +use crate::lazy::expanded::macro_evaluator::RawEExpression; +use crate::lazy::raw_stream_item::LazyRawStreamItem; use crate::lazy::raw_value_ref::RawValueRef; -use crate::lazy::text::raw::v1_1::reader::{ - MacroIdRef, RawTextMacroInvocation, RawTextSExpIterator_1_1, -}; use crate::result::IonFailure; use crate::{IonResult, IonType, RawSymbolTokenRef}; /// A family of types that collectively comprise the lazy reader API for an Ion serialization /// format. These types operate at the 'raw' level; they do not attempt to resolve symbols /// using the active symbol table. -pub trait LazyDecoder<'data>: Sized + Debug + Clone -where - Self: 'data, -{ +// Implementations of this trait are typically unit structs that are never instantiated. +// However, many types are generic over some `D: LazyDecoder`, and having this trait +// extend 'static, Sized, Debug, Clone and Copy means that those types can #[derive(...)] +// those traits themselves without boilerplate `where` clauses. +pub trait LazyDecoder: 'static + Sized + Debug + Clone + Copy { /// A lazy reader that yields [`Self::Value`]s representing the top level values in its input. - type Reader: LazyRawReader<'data, Self>; + type Reader<'data>: LazyRawReader<'data, Self>; /// A value (at any depth) in the input. This can be further inspected to access either its /// scalar data or, if it is a container, to view it as [`Self::List`], [`Self::SExp`] or /// [`Self::Struct`]. - type Value: LazyRawValue<'data, Self>; + type Value<'top>: LazyRawValue<'top, Self>; /// A list whose child values may be accessed iteratively. - type SExp: LazyRawSequence<'data, Self>; + type SExp<'top>: LazyRawSequence<'top, Self>; /// An s-expression whose child values may be accessed iteratively. - type List: LazyRawSequence<'data, Self>; + type List<'top>: LazyRawSequence<'top, Self>; /// A struct whose fields may be accessed iteratively or by field name. - type Struct: LazyRawStruct<'data, Self>; + type Struct<'top>: LazyRawStruct<'top, Self>; /// An iterator over the annotations on the input stream's values. - type AnnotationsIterator: Iterator>>; + type AnnotationsIterator<'top>: Iterator>>; /// An e-expression invoking a macro. (Ion 1.1+) - type MacroInvocation: MacroInvocation<'data, Self>; + type EExpression<'top>: RawEExpression<'top, Self>; } /// An expression found in value position in either serialized Ion or a template. @@ -41,7 +40,7 @@ where /// /// When working with `RawValueExpr`s that always use a given decoder's `Value` and /// `MacroInvocation` associated types, consider using [`LazyRawValueExpr`] instead. -#[derive(Copy, Clone, Debug)] +#[derive(Copy, Clone, Debug, PartialEq)] pub enum RawValueExpr { /// A value literal. For example: `5`, `foo`, or `"hello"` in text. ValueLiteral(V), @@ -60,8 +59,8 @@ pub enum RawValueExpr { /// /// For a version of this type that is not constrained to a particular encoding, see /// [`RawValueExpr`]. -pub type LazyRawValueExpr<'data, D> = - RawValueExpr<>::Value, >::MacroInvocation>; +pub type LazyRawValueExpr<'top, D> = + RawValueExpr<::Value<'top>, ::EExpression<'top>>; impl RawValueExpr { pub fn expect_value(self) -> IonResult { @@ -89,9 +88,9 @@ impl RawValueExpr { /// * a name/value pair (as it is in Ion 1.0) /// * a name/e-expression pair /// * an e-expression -#[derive(Debug)] -pub enum RawFieldExpr<'name, V, M> { - NameValuePair(RawSymbolTokenRef<'name>, RawValueExpr), +#[derive(Clone, Debug)] +pub enum RawFieldExpr<'top, V, M> { + NameValuePair(RawSymbolTokenRef<'top>, RawValueExpr), MacroInvocation(M), } @@ -101,11 +100,8 @@ pub enum RawFieldExpr<'name, V, M> { /// An item found in struct field position an Ion data stream written in the encoding represented /// by the LazyDecoder `D`. -pub type LazyRawFieldExpr<'data, D> = RawFieldExpr< - 'data, - >::Value, - >::MacroInvocation, ->; +pub type LazyRawFieldExpr<'top, D> = + RawFieldExpr<'top, ::Value<'top>, ::EExpression<'top>>; impl<'name, V: Debug, M: Debug> RawFieldExpr<'name, V, M> { pub fn expect_name_value(self) -> IonResult<(RawSymbolTokenRef<'name>, V)> { @@ -157,77 +153,69 @@ pub(crate) mod private { use super::LazyDecoder; - pub trait LazyRawFieldPrivate<'data, D: LazyDecoder<'data>> { + pub trait LazyRawFieldPrivate<'top, D: LazyDecoder> { /// Converts the `LazyRawField` impl to a `LazyRawValue` impl. // At the moment, `LazyRawField`s are just thin wrappers around a `LazyRawValue` that can // safely assume that the value has a field name associated with it. This method allows // us to convert from one to the other when needed. - fn into_value(self) -> D::Value; + fn into_value(self) -> D::Value<'top>; } - pub trait LazyContainerPrivate<'data, D: LazyDecoder<'data>> { + pub trait LazyContainerPrivate<'top, D: LazyDecoder> { /// Constructs a new lazy raw container from a lazy raw value that has been confirmed to be /// of the correct type. - fn from_value(value: D::Value) -> Self; + fn from_value(value: D::Value<'top>) -> Self; } - pub trait LazyRawValuePrivate<'data>: RawValueLiteral { + pub trait LazyRawValuePrivate<'top>: RawValueLiteral { /// Returns the field name associated with this value. If the value is not inside a struct, /// returns `IllegalOperation`. - fn field_name(&self) -> IonResult>; - } -} - -impl<'data> MacroInvocation<'data, TextEncoding_1_1> for RawTextMacroInvocation<'data> { - type ArgumentExpr = LazyRawValueExpr<'data, TextEncoding_1_1>; - type ArgumentsIterator = RawTextSExpIterator_1_1<'data>; - - fn id(&self) -> MacroIdRef { - self.id - } - - fn arguments(&self) -> Self::ArgumentsIterator { - RawTextSExpIterator_1_1::new(self.arguments_bytes()) + fn field_name(&self) -> IonResult>; } } -pub trait LazyRawReader<'data, D: LazyDecoder<'data>> { +pub trait LazyRawReader<'data, D: LazyDecoder> { fn new(data: &'data [u8]) -> Self; - fn next<'a>(&'a mut self) -> IonResult>; + fn next<'top>( + &'top mut self, + allocator: &'top BumpAllocator, + ) -> IonResult> + where + 'data: 'top; } -pub trait LazyRawValue<'data, D: LazyDecoder<'data>>: - private::LazyRawValuePrivate<'data> + Copy + Clone + Debug +pub trait LazyRawValue<'top, D: LazyDecoder>: + private::LazyRawValuePrivate<'top> + Copy + Clone + Debug { fn ion_type(&self) -> IonType; fn is_null(&self) -> bool; - fn annotations(&self) -> D::AnnotationsIterator; - fn read(&self) -> IonResult>; + fn annotations(&self) -> D::AnnotationsIterator<'top>; + fn read(&self) -> IonResult>; } -pub trait LazyRawSequence<'data, D: LazyDecoder<'data>>: - private::LazyContainerPrivate<'data, D> + Debug + Copy + Clone +pub trait LazyRawSequence<'top, D: LazyDecoder>: + private::LazyContainerPrivate<'top, D> + Debug + Copy + Clone { - type Iterator: Iterator>>; - fn annotations(&self) -> D::AnnotationsIterator; + type Iterator: Iterator>>; + fn annotations(&self) -> D::AnnotationsIterator<'top>; fn ion_type(&self) -> IonType; fn iter(&self) -> Self::Iterator; - fn as_value(&self) -> D::Value; + fn as_value(&self) -> D::Value<'top>; } -pub trait LazyRawStruct<'data, D: LazyDecoder<'data>>: - private::LazyContainerPrivate<'data, D> + Debug + Copy + Clone +pub trait LazyRawStruct<'top, D: LazyDecoder>: + private::LazyContainerPrivate<'top, D> + Debug + Copy + Clone { - type Iterator: Iterator>>; + type Iterator: Iterator>>; - fn annotations(&self) -> D::AnnotationsIterator; + fn annotations(&self) -> D::AnnotationsIterator<'top>; fn iter(&self) -> Self::Iterator; } -pub trait LazyRawField<'data, D: LazyDecoder<'data>>: - private::LazyRawFieldPrivate<'data, D> + Debug +pub trait LazyRawField<'top, D: LazyDecoder>: + private::LazyRawFieldPrivate<'top, D> + Debug { - fn name(&self) -> RawSymbolTokenRef<'data>; - fn value(&self) -> D::Value; + fn name(&self) -> RawSymbolTokenRef<'top>; + fn value(&self) -> D::Value<'top>; } diff --git a/src/lazy/encoding.rs b/src/lazy/encoding.rs index 5769827a..0d02542f 100644 --- a/src/lazy/encoding.rs +++ b/src/lazy/encoding.rs @@ -13,7 +13,7 @@ use crate::lazy::text::raw::reader::LazyRawTextReader_1_0; use crate::lazy::text::raw::sequence::{LazyRawTextList_1_0, LazyRawTextSExp_1_0}; use crate::lazy::text::raw::v1_1::reader::{ LazyRawTextList_1_1, LazyRawTextReader_1_1, LazyRawTextSExp_1_1, LazyRawTextStruct_1_1, - RawTextMacroInvocation, + RawTextEExpression_1_1, }; use crate::lazy::text::value::{ LazyRawTextValue, LazyRawTextValue_1_0, LazyRawTextValue_1_1, MatchedRawTextValue, @@ -63,24 +63,24 @@ impl Encoding for TextEncoding_1_1 { pub trait BinaryEncoding: Encoding {} /// Marker trait for text encodings. -pub trait TextEncoding<'data>: - Encoding + LazyDecoder<'data, AnnotationsIterator = RawTextAnnotationsIterator<'data>> +pub trait TextEncoding<'top>: + Encoding + LazyDecoder = RawTextAnnotationsIterator<'top>> { fn value_from_matched( - matched: MatchedRawTextValue<'data>, - ) -> >::Value; + matched: MatchedRawTextValue<'top, Self>, + ) -> ::Value<'top>; } -impl<'data> TextEncoding<'data> for TextEncoding_1_0 { +impl<'top> TextEncoding<'top> for TextEncoding_1_0 { fn value_from_matched( - matched: MatchedRawTextValue<'data>, - ) -> >::Value { + matched: MatchedRawTextValue<'_, Self>, + ) -> ::Value<'_> { LazyRawTextValue_1_0::from(matched) } } -impl<'data> TextEncoding<'data> for TextEncoding_1_1 { +impl<'top> TextEncoding<'top> for TextEncoding_1_1 { fn value_from_matched( - matched: MatchedRawTextValue<'data>, - ) -> >::Value { + matched: MatchedRawTextValue<'_, Self>, + ) -> ::Value<'_> { LazyRawTextValue_1_1::from(matched) } } @@ -89,36 +89,36 @@ impl<'data> TextEncoding<'data> for TextEncoding_1_1 { pub trait EncodingWithMacroSupport {} impl EncodingWithMacroSupport for TextEncoding_1_1 {} -impl<'data> LazyDecoder<'data> for BinaryEncoding_1_0 { - type Reader = LazyRawBinaryReader<'data>; - type Value = LazyRawBinaryValue<'data>; - type SExp = LazyRawBinarySExp<'data>; - type List = LazyRawBinaryList<'data>; - type Struct = LazyRawBinaryStruct<'data>; - type AnnotationsIterator = RawBinaryAnnotationsIterator<'data>; +impl LazyDecoder for BinaryEncoding_1_0 { + type Reader<'data> = LazyRawBinaryReader<'data>; + type Value<'top> = LazyRawBinaryValue<'top>; + type SExp<'top> = LazyRawBinarySExp<'top>; + type List<'top> = LazyRawBinaryList<'top>; + type Struct<'top> = LazyRawBinaryStruct<'top>; + type AnnotationsIterator<'top> = RawBinaryAnnotationsIterator<'top>; // Macros are not supported in Ion 1.0 - type MacroInvocation = Never; + type EExpression<'top> = Never; } -impl<'data> LazyDecoder<'data> for TextEncoding_1_0 { - type Reader = LazyRawTextReader_1_0<'data>; - type Value = LazyRawTextValue_1_0<'data>; - type SExp = LazyRawTextSExp_1_0<'data>; - type List = LazyRawTextList_1_0<'data>; - type Struct = LazyRawTextStruct_1_0<'data>; - type AnnotationsIterator = RawTextAnnotationsIterator<'data>; +impl LazyDecoder for TextEncoding_1_0 { + type Reader<'data> = LazyRawTextReader_1_0<'data>; + type Value<'top> = LazyRawTextValue_1_0<'top>; + type SExp<'top> = LazyRawTextSExp_1_0<'top>; + type List<'top> = LazyRawTextList_1_0<'top>; + type Struct<'top> = LazyRawTextStruct_1_0<'top>; + type AnnotationsIterator<'top> = RawTextAnnotationsIterator<'top>; // Macros are not supported in Ion 1.0 - type MacroInvocation = Never; + type EExpression<'top> = Never; } -impl<'data> LazyDecoder<'data> for TextEncoding_1_1 { - type Reader = LazyRawTextReader_1_1<'data>; - type Value = LazyRawTextValue_1_1<'data>; - type SExp = LazyRawTextSExp_1_1<'data>; - type List = LazyRawTextList_1_1<'data>; - type Struct = LazyRawTextStruct_1_1<'data>; - type AnnotationsIterator = RawTextAnnotationsIterator<'data>; - type MacroInvocation = RawTextMacroInvocation<'data>; +impl LazyDecoder for TextEncoding_1_1 { + type Reader<'data> = LazyRawTextReader_1_1<'data>; + type Value<'top> = LazyRawTextValue_1_1<'top>; + type SExp<'top> = LazyRawTextSExp_1_1<'top>; + type List<'top> = LazyRawTextList_1_1<'top>; + type Struct<'top> = LazyRawTextStruct_1_1<'top>; + type AnnotationsIterator<'top> = RawTextAnnotationsIterator<'top>; + type EExpression<'top> = RawTextEExpression_1_1<'top>; } /// Marker trait for types that represent value literals in an Ion stream of some encoding. @@ -126,14 +126,14 @@ impl<'data> LazyDecoder<'data> for TextEncoding_1_1 { // `LazyDecoder::Value` to `ExpandedValueSource`. That is: // // impl<'top, 'data, V: RawValueLiteral, D: LazyDecoder<'data, Value = V>> From -// for ExpandedValueSource<'top, 'data, D> +// for ExpandedValueSource<'top, D> // // If we do not confine the implementation to types with a marker trait, rustc complains that // someone may someday use `ExpandedValueSource` as a `LazyDecoder::Value`, and then the // the implementation will conflict with the core `impl From for T` implementation. pub trait RawValueLiteral {} -impl<'data> RawValueLiteral for MatchedRawTextValue<'data> {} -impl<'data, E: TextEncoding<'data>> RawValueLiteral for LazyRawTextValue<'data, E> {} -impl<'data> RawValueLiteral for LazyRawBinaryValue<'data> {} -impl<'data> RawValueLiteral for LazyRawAnyValue<'data> {} +impl<'top, E: TextEncoding<'top>> RawValueLiteral for MatchedRawTextValue<'top, E> {} +impl<'top, E: TextEncoding<'top>> RawValueLiteral for LazyRawTextValue<'top, E> {} +impl<'top> RawValueLiteral for LazyRawBinaryValue<'top> {} +impl<'top> RawValueLiteral for LazyRawAnyValue<'top> {} diff --git a/src/lazy/expanded/compiler.rs b/src/lazy/expanded/compiler.rs new file mode 100644 index 00000000..26bfcbe1 --- /dev/null +++ b/src/lazy/expanded/compiler.rs @@ -0,0 +1,733 @@ +//! Compiles template definition language (TDL) expressions into a form suitable for fast incremental +//! evaluation. +use std::ops::Range; + +use crate::lazy::decoder::LazyDecoder; +use crate::lazy::expanded::template::{ + ExprRange, MacroSignature, Parameter, ParameterEncoding, TemplateBody, TemplateBodyElement, + TemplateBodyMacroInvocation, TemplateBodyValueExpr, TemplateMacro, TemplateValue, +}; +use crate::lazy::expanded::EncodingContext; +use crate::lazy::r#struct::LazyStruct; +use crate::lazy::reader::LazyTextReader_1_1; +use crate::lazy::sequence::{LazyList, LazySExp}; +use crate::lazy::value::LazyValue; +use crate::lazy::value_ref::ValueRef; +use crate::result::IonFailure; +use crate::symbol_ref::AsSymbolRef; +use crate::{IonError, IonResult, IonType, SymbolRef}; + +/// Validates a given TDL expression and compiles it into a [`TemplateMacro`] that can be added +/// to a [`MacroTable`](crate::lazy::expanded::macro_table::MacroTable). +pub struct TemplateCompiler {} + +impl TemplateCompiler { + /// Takes a TDL expression in the form: + /// ```ion_1_1 + /// (macro name (param1 param2 [...] paramN) body) + /// ``` + /// and compiles it into a [`TemplateMacro`]. + /// + /// The [`TemplateMacro`] stores a sequence of [`TemplateBodyValueExpr`]s that need to be evaluated + /// in turn. Each step is either a value literal, a reference to one of the parameters (that is: + /// a variable), or a macro invocation. + /// + /// Expressions that contain other expressions (i.e. containers and macro invocations) each + /// store the range of subexpressions that they contain, allowing a reader to skip the entire + /// parent expression as desired. For example, in this macro: + /// + /// ```ion_1_1 + /// (macro foo () + /// // Template body expressions + /// [ // #0, contains expressions 1..=4 + /// 1, // #1 + /// (values // #2, contains expressions 3..=4 + /// 2 // #3 + /// 3 // #4 + /// ) + /// ] + /// ) + /// ``` + /// + /// the step corresponding to `(values 2 3)` would store the range `3..=4`, indicating that + /// it contains template body expressions number `3` and `4`. A reader wishing to skip that call + /// to `values` could do so by moving ahead to expression number `5`. The outer + /// list (`[1, (values 2 3)]`) would store a `1..=4`, indicating that it contains the `1`, + /// the a macro invocation `values`, and the two arguments that belong to `values`. + /// + /// The compiler recognizes the `(quote expr1 expr2 [...] exprN)` form, adding each subexpression + /// to the template without interpretation. `(quote ...)` does not appear in the compiled + /// template as there is nothing more for it to do at expansion time. + pub fn compile_from_text( + context: EncodingContext, + expression: &str, + ) -> IonResult { + // TODO: This is a rudimentary implementation that panics instead of performing thorough + // validation. Where it does surface errors, the messages are too terse. + let mut reader = LazyTextReader_1_1::new(expression.as_bytes())?; + let invocation = reader.expect_next()?.read()?.expect_sexp()?; + let mut values = invocation.iter(); + + let macro_keyword = values.next().expect("macro ID")?.read()?.expect_symbol()?; + if macro_keyword != "macro" { + return IonResult::decoding_error( + "macro compilation expects a sexp starting with the keyword `macro`", + ); + } + + // TODO: Enforce 'identifier' syntax subset of symbol + // TODO: Syntactic support address IDs like `(:14 ...)` + let template_name = match values.next().expect("template name")?.read()? { + ValueRef::Symbol(s) if s.text().is_none() => { + return IonResult::decoding_error("$0 is not a valid macro name") + } + ValueRef::Symbol(s) => Some(s.text().unwrap().to_owned()), + ValueRef::Null(IonType::Symbol | IonType::Null) => None, + other => { + return IonResult::decoding_error(format!( + "expected identifier as macro name but found: {other:?}" + )) + } + }; + + let params = values + .next() + .expect("parameters sexp")? + .read()? + .expect_sexp()?; + + let mut compiled_params = Vec::new(); + for param_result in ¶ms { + let compiled_param = Parameter::new( + param_result? + .read()? + .expect_symbol()? + .text() + .unwrap() + .to_string(), + ParameterEncoding::Tagged, + ); + compiled_params.push(compiled_param); + } + let signature = MacroSignature::new(compiled_params); + let body = values.next().expect("template body")?; + let mut compiled_body = TemplateBody { + expressions: Vec::new(), + annotations_storage: Vec::new(), + }; + Self::compile_value( + context, + &signature, + &mut compiled_body, + /*is_quoted=*/ false, + body, + )?; + let template_macro = TemplateMacro { + name: template_name, + signature, + body: compiled_body, + }; + Ok(template_macro) + } + + /// Recursively visits all of the expressions in `lazy_value` and adds their corresponding + /// [`TemplateBodyValueExpr`] sequences to the `TemplateBody`. + /// + /// If `is_quoted` is true, nested symbols and s-expressions will not be interpreted. + fn compile_value<'top, D: LazyDecoder>( + context: EncodingContext<'top>, + signature: &MacroSignature, + definition: &mut TemplateBody, + is_quoted: bool, + lazy_value: LazyValue<'top, D>, + ) -> IonResult<()> { + let annotations_range_start = definition.annotations_storage.len(); + for annotation_result in lazy_value.annotations() { + let annotation = annotation_result?; + definition.annotations_storage.push(annotation.to_owned()); + } + let annotations_range_end = definition.annotations_storage.len(); + let annotations_range = annotations_range_start..annotations_range_end; + + let value = match lazy_value.read()? { + ValueRef::Null(ion_type) => TemplateValue::Null(ion_type), + ValueRef::Bool(b) => TemplateValue::Bool(b), + ValueRef::Int(i) => TemplateValue::Int(i), + ValueRef::Float(f) => TemplateValue::Float(f), + ValueRef::Decimal(d) => TemplateValue::Decimal(d), + ValueRef::Timestamp(t) => TemplateValue::Timestamp(t), + ValueRef::String(s) => TemplateValue::String(s.to_owned()), + ValueRef::Symbol(s) if is_quoted => TemplateValue::Symbol(s.to_owned()), + ValueRef::Symbol(s) => { + return Self::compile_variable_reference( + context, + signature, + definition, + annotations_range, + s, + ) + } + ValueRef::Blob(b) => TemplateValue::Blob(b.to_owned()), + ValueRef::Clob(c) => TemplateValue::Clob(c.to_owned()), + ValueRef::SExp(s) => { + return Self::compile_sexp( + context, + signature, + definition, + is_quoted, + annotations_range.clone(), + s, + ); + } + ValueRef::List(l) => { + return Self::compile_list( + context, + signature, + definition, + is_quoted, + annotations_range.clone(), + l, + ) + } + ValueRef::Struct(s) => { + return Self::compile_struct( + context, + signature, + definition, + is_quoted, + annotations_range.clone(), + s, + ) + } + }; + definition.push_element( + TemplateBodyElement::with_value(value).with_annotations(annotations_range), + ); + Ok(()) + } + + /// Helper method for visiting all of the child expressions in a list. + fn compile_list<'top, D: LazyDecoder>( + context: EncodingContext<'top>, + signature: &MacroSignature, + definition: &mut TemplateBody, + is_quoted: bool, + annotations_range: Range, + lazy_list: LazyList<'top, D>, + ) -> IonResult<()> { + let list_element_index = definition.expressions.len(); + // Assume the list contains zero expressions to start, we'll update this at the end + let list_element = TemplateBodyElement::with_value(TemplateValue::List(ExprRange::empty())); + definition.push_element(list_element); + let list_children_start = definition.expressions.len(); + for value_result in &lazy_list { + let value = value_result?; + Self::compile_value(context, signature, definition, is_quoted, value)?; + } + let list_children_end = definition.expressions.len(); + // Update the list entry to reflect the number of child expressions it contains + let list_element = TemplateBodyElement::with_value(TemplateValue::List(ExprRange::new( + list_children_start..list_children_end, + ))) + .with_annotations(annotations_range); + definition.expressions[list_element_index] = TemplateBodyValueExpr::Element(list_element); + Ok(()) + } + + /// Helper method for visiting all of the child expressions in a sexp. + fn compile_sexp<'top, D: LazyDecoder>( + context: EncodingContext<'top>, + signature: &MacroSignature, + definition: &mut TemplateBody, + is_quoted: bool, + annotations_range: Range, + lazy_sexp: LazySExp<'top, D>, + ) -> IonResult<()> { + if is_quoted { + // If `is_quoted` is true, this s-expression is nested somewhere inside a `(quote ...)` + // macro invocation. The sexp and its child expressions can be added to the TemplateBody + // without interpretation. + Self::compile_quoted_sexp(context, signature, definition, annotations_range, lazy_sexp) + } else { + // If `is_quoted` is false, the sexp is a macro invocation. + // First, verify that it doesn't have annotations. + if !annotations_range.is_empty() { + return IonResult::decoding_error("found annotations on a macro invocation"); + } + // Peek at the first expression in the sexp. If it's the symbol `quoted`... + if Self::sexp_is_quote_macro(&lazy_sexp)? { + // ...then we set `is_quoted` to true and compile all of its child expressions. + Self::compile_quoted_elements(context, signature, definition, lazy_sexp) + } else { + // Otherwise, add the macro invocation to the template body. + Self::compile_macro(context, signature, definition, lazy_sexp) + } + }?; + + Ok(()) + } + + /// Adds a `lazy_sexp` that has been determined to represent a macro invocation to the + /// TemplateBody. + fn compile_macro<'top, D: LazyDecoder>( + context: EncodingContext<'top>, + signature: &MacroSignature, + definition: &mut TemplateBody, + lazy_sexp: LazySExp<'top, D>, + ) -> IonResult<()> { + let mut expressions = lazy_sexp.iter(); + // Convert the macro ID (name or address) into an address. If this refers to a macro that + // doesn't exist yet, this will return an error. This prevents recursion. + // TODO: Consider storing the name of the invoked target macro in the host's definition + // as debug information. The name cannot be stored directly on the + // TemplateBodyMacroInvocation as that would prevent the type from being `Copy`. + let (_maybe_name, macro_address) = + Self::name_and_address_from_id_expr(context, expressions.next())?; + let macro_step_index = definition.expressions.len(); + // Assume the macro contains zero argument expressions to start, we'll update + // this at the end of the function. + definition.push_macro_invocation(macro_address, ExprRange::empty()); + let arguments_start = definition.expressions.len(); + for argument_result in expressions { + let argument = argument_result?; + Self::compile_value( + context, signature, definition, /*is_quoted=*/ false, argument, + )?; + } + let arguments_end = definition.expressions.len(); + // Update the macro step to reflect the macro's address and number of child expressions it + // contains + let template_macro_invocation = TemplateBodyMacroInvocation::new( + macro_address, + ExprRange::new(arguments_start..arguments_end), + ); + definition.expressions[macro_step_index] = + TemplateBodyValueExpr::MacroInvocation(template_macro_invocation); + Ok(()) + } + + /// Given a `LazyValue` that represents a macro ID (name or address), attempts to resolve the + /// ID to a macro address. + fn name_and_address_from_id_expr<'top, D: LazyDecoder>( + context: EncodingContext<'top>, + id_expr: Option>>, + ) -> IonResult<(Option, usize)> { + match id_expr { + None => IonResult::decoding_error("found an empty s-expression in an unquoted context"), + Some(Err(e)) => Err(e), + Some(Ok(value)) => match value.read()? { + ValueRef::Symbol(s) => { + if let Some(name) = s.text() { + let address = + context.macro_table.address_for_name(name).ok_or_else(|| { + IonError::decoding_error(format!("unrecognized macro name: {name}")) + })?; + Ok((Some(name.to_string()), address)) + } else { + IonResult::decoding_error("macro names must be an identifier") + } + } + ValueRef::Int(int) => { + let address = usize::try_from(int.expect_i64()?).map_err(|_| { + IonError::decoding_error(format!("found an invalid macro address: {int}")) + })?; + if context.macro_table.macro_at_address(address).is_none() { + IonResult::decoding_error(format!( + "invocation of invalid macro address {address}" + )) + } else { + Ok((None, address)) + } + } + other => IonResult::decoding_error(format!( + "expected a macro name (symbol) or address (int), but found: {other:?}" + )), + }, + } + } + + /// Visits all of the child expressions of `lazy_sexp`, adding them to the `TemplateBody` + /// without interpretation. `lazy_sexp` itself is the `quote` macro, and does not get added + /// to the template body as there is nothing more for it to do at evaluation time. + fn compile_quoted_elements<'top, D: LazyDecoder>( + context: EncodingContext<'top>, + signature: &MacroSignature, + definition: &mut TemplateBody, + lazy_sexp: LazySExp<'top, D>, + ) -> IonResult<()> { + let mut elements = lazy_sexp.iter(); + // If this method is called, we've already peeked at the first element to confirm that + // it's the symbol `quote`. We can discard it. + let _ = elements.next().unwrap()?; + for element_result in elements { + Self::compile_value( + context, + signature, + definition, + /*is_quoted=*/ true, + element_result?, + )?; + } + Ok(()) + } + + /// Adds `lazy_sexp` to the template body without interpretation. + fn compile_quoted_sexp<'top, D: LazyDecoder>( + context: EncodingContext<'top>, + signature: &MacroSignature, + definition: &mut TemplateBody, + annotations_range: Range, + lazy_sexp: LazySExp<'top, D>, + ) -> IonResult<()> { + let sexp_element_index = definition.expressions.len(); + // Assume the sexp contains zero expressions to start, we'll update this at the end + let sexp_element = TemplateBodyElement::with_value(TemplateValue::SExp(ExprRange::empty())); + definition.push_element(sexp_element); + let sexp_children_start = definition.expressions.len(); + for value_result in &lazy_sexp { + let value = value_result?; + Self::compile_value( + context, signature, definition, /*is_quoted=*/ true, value, + )?; + } + let sexp_children_end = definition.expressions.len(); + let sexp_element = TemplateBodyElement::with_value(TemplateValue::SExp(ExprRange::new( + sexp_children_start..sexp_children_end, + ))) + .with_annotations(annotations_range); + // Update the sexp entry to reflect the number of child expressions it contains + definition.expressions[sexp_element_index] = TemplateBodyValueExpr::Element(sexp_element); + Ok(()) + } + + /// Returns `Ok(true)` if the first child value in the `LazySexp` is the symbol `quote`. + /// This method should only be called in an unquoted context. + fn sexp_is_quote_macro(sexp: &LazySExp) -> IonResult { + let first_expr = sexp.iter().next(); + match first_expr { + // If the sexp is empty and we're not in a quoted context, that's an error. + None => IonResult::decoding_error("found an empty s-expression in an unquoted context"), + Some(Err(e)) => Err(e), + Some(Ok(lazy_value)) => { + let value = lazy_value.read()?; + Ok(value == ValueRef::Symbol("quote".as_symbol_ref())) + } + } + } + + /// Recursively adds all of the expressions in `lazy_struct` to the `TemplateBody`. + fn compile_struct<'top, D: LazyDecoder>( + context: EncodingContext<'top>, + signature: &MacroSignature, + definition: &mut TemplateBody, + is_quoted: bool, + annotations_range: Range, + lazy_struct: LazyStruct<'top, D>, + ) -> IonResult<()> { + let struct_element_index = definition.expressions.len(); + // Assume the struct contains zero expressions to start, we'll update this at the end + let struct_element = + TemplateBodyElement::with_value(TemplateValue::Struct(ExprRange::empty())); + definition.push_element(struct_element); + let struct_start = definition.expressions.len(); + for field_result in &lazy_struct { + let field = field_result?; + let name = field.name()?.to_owned(); + let name_element = TemplateBodyElement::with_value(TemplateValue::Symbol(name)); + definition.push_element(name_element); + Self::compile_value(context, signature, definition, is_quoted, field.value())?; + } + let struct_end = definition.expressions.len(); + // Update the struct entry to reflect the range of expansion steps it contains. + let struct_element = TemplateBodyElement::with_value(TemplateValue::Struct( + ExprRange::new(struct_start..struct_end), + )) + .with_annotations(annotations_range); + definition.expressions[struct_element_index] = + TemplateBodyValueExpr::Element(struct_element); + Ok(()) + } + + /// Resolves `variable` to a parameter in the macro signature and adds a corresponding + /// `TemplateExpansionStep` to the `TemplateBody`. + fn compile_variable_reference( + _context: EncodingContext, + signature: &MacroSignature, + definition: &mut TemplateBody, + annotations_range: Range, + variable: SymbolRef, + ) -> IonResult<()> { + let name = variable.text().ok_or_else(|| { + IonError::decoding_error("found variable whose name is unknown text ($0)") + })?; + if !annotations_range.is_empty() { + return IonResult::decoding_error(format!( + "found a variable reference '{name}' with annotations" + )); + } + let signature_index = signature + .parameters() + .iter() + .position(|p| p.name() == name) + .ok_or_else(|| { + IonError::decoding_error(format!("variable '{name}' is not recognized")) + })?; + definition.push_variable(signature_index); + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use crate::lazy::expanded::compiler::TemplateCompiler; + use crate::lazy::expanded::macro_table::MacroTable; + use crate::lazy::expanded::template::{ + ExprRange, TemplateBodyMacroInvocation, TemplateBodyValueExpr, + TemplateBodyVariableReference, TemplateMacro, TemplateValue, + }; + use crate::lazy::expanded::EncodingContext; + use crate::{Int, IntoAnnotations, IonResult, Symbol, SymbolTable}; + + // This function only looks at the value portion of the TemplateElement. To compare annotations, + // see the `expect_annotations` method. + fn expect_value( + definition: &TemplateMacro, + index: usize, + expected: TemplateValue, + ) -> IonResult<()> { + let actual = definition + .body() + .expressions() + .get(index) + .expect("no such expansion step") + .expect_element() + .unwrap_or_else(|_| panic!("expected value {expected:?}")); + assert_eq!(actual.value(), &expected); + Ok(()) + } + + fn expect_macro( + definition: &TemplateMacro, + index: usize, + expected_address: usize, + expected_num_arguments: usize, + ) -> IonResult<()> { + expect_step( + definition, + index, + TemplateBodyValueExpr::MacroInvocation(TemplateBodyMacroInvocation::new( + expected_address, + // The arg range starts just after the macro invocation step and goes for `expected_num_arguments`. + ExprRange::new(index + 1..index + 1 + expected_num_arguments), + )), + ) + } + + fn expect_variable( + definition: &TemplateMacro, + index: usize, + expected_signature_index: usize, + ) -> IonResult<()> { + expect_step( + definition, + index, + TemplateBodyValueExpr::Variable(TemplateBodyVariableReference::new( + expected_signature_index, + )), + ) + } + + fn expect_step( + definition: &TemplateMacro, + index: usize, + expected: TemplateBodyValueExpr, + ) -> IonResult<()> { + let step = definition + .body() + .expressions() + .get(index) + .expect("no such expansion step"); + assert_eq!(step, &expected); + Ok(()) + } + + fn expect_annotations( + definition: &TemplateMacro, + index: usize, + expected: A, + ) { + let element = definition + .body + .expressions() + .get(index) + .expect("requested index does not exist") + .expect_element() + .unwrap(); + let actual_annotations = definition + .body + .annotations_storage() + .get(element.annotations_range().ops_range()) + .expect("invalid annotations range") + .into_annotations(); + let expected_annotations = expected.into_annotations(); + assert_eq!(actual_annotations, expected_annotations); + } + + struct TestResources { + macro_table: MacroTable, + symbol_table: SymbolTable, + allocator: bumpalo::Bump, + } + + impl TestResources { + fn new() -> Self { + Self { + macro_table: MacroTable::new(), + symbol_table: SymbolTable::new(), + allocator: bumpalo::Bump::new(), + } + } + + fn context(&self) -> EncodingContext { + EncodingContext { + macro_table: &self.macro_table, + symbol_table: &self.symbol_table, + allocator: &self.allocator, + } + } + } + + #[test] + fn single_scalar() -> IonResult<()> { + let resources = TestResources::new(); + let context = resources.context(); + + let expression = "(macro foo () 42)"; + + let template = TemplateCompiler::compile_from_text(context, expression)?; + assert_eq!(template.name(), "foo"); + assert_eq!(template.signature().parameters().len(), 0); + expect_value(&template, 0, TemplateValue::Int(42.into()))?; + Ok(()) + } + + #[test] + fn single_list() -> IonResult<()> { + let resources = TestResources::new(); + let context = resources.context(); + + let expression = "(macro foo () [1, 2, 3])"; + + let template = TemplateCompiler::compile_from_text(context, expression)?; + assert_eq!(template.name(), "foo"); + assert_eq!(template.signature().parameters().len(), 0); + expect_value(&template, 0, TemplateValue::List(ExprRange::new(1..4)))?; + expect_value(&template, 1, TemplateValue::Int(1.into()))?; + expect_value(&template, 2, TemplateValue::Int(2.into()))?; + expect_value(&template, 3, TemplateValue::Int(3.into()))?; + Ok(()) + } + + #[test] + fn multiple_scalar() -> IonResult<()> { + let resources = TestResources::new(); + let context = resources.context(); + + let expression = r#"(macro foo () (values 42 "hello" false))"#; + + let template = TemplateCompiler::compile_from_text(context, expression)?; + assert_eq!(template.name(), "foo"); + assert_eq!(template.signature().parameters().len(), 0); + expect_macro( + &template, + 0, + context.macro_table.address_for_name("values").unwrap(), + 3, + )?; + expect_value(&template, 1, TemplateValue::Int(42.into()))?; + expect_value(&template, 2, TemplateValue::String("hello".into()))?; + expect_value(&template, 3, TemplateValue::Bool(false))?; + Ok(()) + } + + #[test] + fn try_it() -> IonResult<()> { + let resources = TestResources::new(); + let context = resources.context(); + + let expression = "(macro foo (x y z) [100, [200, a::b::300], x, {y: [true, false, z]}])"; + + let template = TemplateCompiler::compile_from_text(context, expression)?; + expect_value(&template, 0, TemplateValue::List(ExprRange::new(1..12)))?; + expect_value(&template, 1, TemplateValue::Int(Int::from(100)))?; + expect_value(&template, 2, TemplateValue::List(ExprRange::new(3..5)))?; + expect_value(&template, 3, TemplateValue::Int(Int::from(200)))?; + expect_value(&template, 4, TemplateValue::Int(Int::from(300)))?; + expect_annotations(&template, 4, ["a", "b"]); + expect_variable(&template, 5, 0)?; + expect_value(&template, 6, TemplateValue::Struct(ExprRange::new(7..12)))?; + expect_value(&template, 7, TemplateValue::Symbol(Symbol::from("y")))?; + expect_value(&template, 8, TemplateValue::List(ExprRange::new(9..12)))?; + expect_value(&template, 9, TemplateValue::Bool(true))?; + expect_value(&template, 10, TemplateValue::Bool(false))?; + expect_variable(&template, 11, 2)?; + Ok(()) + } + + #[test] + fn identity_macro() -> IonResult<()> { + let resources = TestResources::new(); + let context = resources.context(); + + let expression = "(macro identity (x) x)"; + + let template = TemplateCompiler::compile_from_text(context, expression)?; + assert_eq!(template.name(), "identity"); + assert_eq!(template.signature().parameters().len(), 1); + expect_variable(&template, 0, 0)?; + Ok(()) + } + + #[test] + fn quote() -> IonResult<()> { + let resources = TestResources::new(); + let context = resources.context(); + + let expression = r#" + (macro foo (x) + // Outer 'values' call allows multiple expressions in the body + (values + // This `values` is a macro call that has a single argument: the variable `x` + (values x) + // This `quote` call causes the inner `(values x)` to be an uninterpreted s-expression. + (quote + (values x)))) + "#; + + let template = TemplateCompiler::compile_from_text(context, expression)?; + assert_eq!(template.name(), "foo"); + assert_eq!(template.signature().parameters().len(), 1); + // Outer `values` + expect_macro( + &template, + 0, + context.macro_table.address_for_name("values").unwrap(), + 5, + )?; + // First argument: `(values x)` + expect_macro( + &template, + 1, + context.macro_table.address_for_name("values").unwrap(), + 1, + )?; + expect_variable(&template, 2, 0)?; + // Second argument: `(quote (values x))` + // Notice that the `quote` is not part of the compiled output, only its arguments + expect_value(&template, 3, TemplateValue::SExp(ExprRange::new(4..6)))?; + expect_value(&template, 4, TemplateValue::Symbol("values".into()))?; + expect_value(&template, 5, TemplateValue::Symbol("x".into()))?; + + Ok(()) + } +} diff --git a/src/lazy/expanded/e_expression.rs b/src/lazy/expanded/e_expression.rs index b1d87a81..62d4053e 100644 --- a/src/lazy/expanded/e_expression.rs +++ b/src/lazy/expanded/e_expression.rs @@ -1,31 +1,99 @@ //! Types and traits representing an e-expression in an Ion stream. +#![allow(non_camel_case_types)] -use crate::lazy::decoder::{LazyDecoder, LazyRawValueExpr, RawValueExpr}; -use crate::lazy::expanded::macro_evaluator::{ArgumentKind, ToArgumentKind}; -use crate::lazy::expanded::{EncodingContext, ExpandedValueSource, LazyExpandedValue}; - -// When a `LazyRawValueExpr` appears in argument position within an e-expression, this trait -// implementation recognizes it as either a value or another macro invocation. -impl<'data, D: LazyDecoder<'data>> ToArgumentKind<'data, D, D::MacroInvocation> - for LazyRawValueExpr<'data, D> -{ - fn to_arg_expr<'top>( - self, +use crate::lazy::decoder::{LazyDecoder, LazyRawValueExpr}; +use crate::lazy::encoding::TextEncoding_1_1; +use crate::lazy::expanded::macro_evaluator::{MacroExpr, RawEExpression, ValueExpr}; +use crate::lazy::expanded::macro_table::MacroRef; +use crate::lazy::expanded::{EncodingContext, LazyExpandedValue}; +use crate::lazy::text::raw::v1_1::reader::MacroIdRef; +use crate::IonResult; +use std::fmt::{Debug, Formatter}; + +/// An e-expression (in Ion format `D`) that has been resolved in the current encoding context. +#[derive(Copy, Clone)] +pub struct EExpression<'top, D: LazyDecoder> { + pub(crate) context: EncodingContext<'top>, + pub(crate) raw_invocation: D::EExpression<'top>, + pub(crate) invoked_macro: MacroRef<'top>, +} + +impl<'top, D: LazyDecoder> EExpression<'top, D> { + pub fn raw_invocation(&self) -> D::EExpression<'top> { + self.raw_invocation + } + pub fn invoked_macro(&self) -> MacroRef<'top> { + self.invoked_macro + } +} + +impl<'top, D: LazyDecoder> Debug for EExpression<'top, D> { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "EExpression {:?}", self.raw_invocation) + } +} + +impl<'top, D: LazyDecoder> EExpression<'top, D> { + pub fn new( context: EncodingContext<'top>, - ) -> ArgumentKind<'top, 'data, D, D::MacroInvocation> - where - Self: 'top, - { - match self { - // In this implementation, we're reading arguments to an E-expression in the data stream. - // Because e-expressions appear in the data stream (and not in a template), there is no - // environment of named variables. We do not attempt to resolve symbols as though they - // were variable names and instead pass them along as value literals. - RawValueExpr::ValueLiteral(value) => ArgumentKind::ValueLiteral(LazyExpandedValue { - context, - source: ExpandedValueSource::ValueLiteral(value), - }), - RawValueExpr::MacroInvocation(invocation) => ArgumentKind::MacroInvocation(invocation), + raw_invocation: D::EExpression<'top>, + invoked_macro: MacroRef<'top>, + ) -> Self { + Self { + context, + raw_invocation, + invoked_macro, } } } + +impl<'top, D: LazyDecoder> EExpression<'top, D> { + pub fn id(&self) -> MacroIdRef<'top> { + self.raw_invocation.id() + } + + pub fn arguments(&self) -> EExpressionArgsIterator<'top, D> { + EExpressionArgsIterator { + context: self.context, + raw_args: self.raw_invocation.raw_arguments(), + } + } +} + +impl<'top, D: LazyDecoder> From> for MacroExpr<'top, D> { + fn from(value: EExpression<'top, D>) -> Self { + MacroExpr::EExp(value) + } +} + +pub struct EExpressionArgsIterator<'top, D: LazyDecoder> { + context: EncodingContext<'top>, + raw_args: as RawEExpression<'top, D>>::RawArgumentsIterator<'top>, +} + +impl<'top, D: LazyDecoder> Iterator for EExpressionArgsIterator<'top, D> { + type Item = IonResult>; + + fn next(&mut self) -> Option { + let raw_arg: LazyRawValueExpr<'top, D> = match self.raw_args.next()? { + Ok(arg) => arg, + Err(e) => return Some(Err(e)), + }; + + let expr = match raw_arg { + LazyRawValueExpr::::ValueLiteral(value) => { + ValueExpr::ValueLiteral(LazyExpandedValue::from_value(self.context, value)) + } + LazyRawValueExpr::::MacroInvocation(raw_invocation) => { + let invocation = match raw_invocation.resolve(self.context) { + Ok(invocation) => invocation, + Err(e) => return Some(Err(e)), + }; + ValueExpr::MacroInvocation(invocation.into()) + } + }; + Some(Ok(expr)) + } +} + +pub type TextEExpression_1_1<'top> = EExpression<'top, TextEncoding_1_1>; diff --git a/src/lazy/expanded/macro_evaluator.rs b/src/lazy/expanded/macro_evaluator.rs index 6157d618..43dd2b87 100644 --- a/src/lazy/expanded/macro_evaluator.rs +++ b/src/lazy/expanded/macro_evaluator.rs @@ -10,208 +10,329 @@ //! The evaluation logic is the same for macros in both contexts, though there are differences in //! encoding and argument handling that must be considered. For more information, see the //! documentation for the types below. +#![allow(non_camel_case_types)] use std::fmt::{Debug, Formatter}; -use std::marker::PhantomData; use bumpalo::collections::{String as BumpString, Vec as BumpVec}; -use crate::lazy::decoder::LazyDecoder; -use crate::lazy::expanded::macro_table::MacroKind; -use crate::lazy::expanded::stack::Stack; +use crate::lazy::decoder::{LazyDecoder, LazyRawValueExpr}; +use crate::lazy::expanded::e_expression::{EExpression, EExpressionArgsIterator}; +use crate::lazy::expanded::macro_table::{MacroKind, MacroRef}; +use crate::lazy::expanded::sequence::Environment; +use crate::lazy::expanded::template::{ + TemplateBodyValueExpr, TemplateBodyVariableReference, TemplateElement, TemplateMacroInvocation, + TemplateMacroInvocationArgsIterator, TemplateMacroRef, TemplateValue, +}; use crate::lazy::expanded::EncodingContext; use crate::lazy::expanded::{ExpandedValueRef, ExpandedValueSource, LazyExpandedValue}; use crate::lazy::str_ref::StrRef; use crate::lazy::text::raw::v1_1::reader::MacroIdRef; use crate::result::IonFailure; -use crate::{IonError, IonResult, RawSymbolTokenRef, Sequence}; - -/// A syntactic entity that represents the invocation of a macro in some context. -/// -/// This entity may be an item from a binary stream, a text stream, or a template definition. -/// Implementors must specify how their type can be mapped to a macro ID and a sequence of arguments. -pub trait MacroInvocation<'data, D: LazyDecoder<'data>>: Copy + Clone + Debug { - /// A syntax-specific type that represents an argument in this macro invocation. - type ArgumentExpr: ToArgumentKind<'data, D, Self>; +use crate::{IonError, IonResult, RawSymbolTokenRef}; +/// The syntactic entity in format `D` that represents an e-expression. This expression has not +/// yet been resolved in the current encoding context. +pub trait RawEExpression<'top, D: LazyDecoder = Self>>: + Debug + Copy + Clone +{ /// An iterator that yields the macro invocation's arguments in order. - type ArgumentsIterator: Iterator>; + type RawArgumentsIterator<'a>: Iterator>> + where + Self: 'a; /// The macro name or address specified at the head of this macro invocation. - fn id(&self) -> MacroIdRef; + fn id(&self) -> MacroIdRef<'top>; /// The arguments that follow the macro name or address in this macro invocation. - fn arguments(&self) -> Self::ArgumentsIterator; + fn raw_arguments(&self) -> Self::RawArgumentsIterator<'top>; + + /// Looks up the macro invoked by this E-expression in the given `EncodingContext`. + /// If the lookup is successful, returns an `Ok` containing a resolved `EExpression` that holds + /// a reference to the macro being invoked. + /// If the ID cannot be found in the `EncodingContext`, returns `Err`. + fn resolve(self, context: EncodingContext<'top>) -> IonResult> { + let invoked_macro = context + .macro_table + .macro_with_id(self.id()) + .ok_or_else(|| { + IonError::decoding_error(format!("unrecognized macro ID {:?}", self.id())) + })?; + Ok(EExpression::new(context, self, invoked_macro)) + } +} + +/// An invocation of a macro found in either the data stream or in the body of a template. +/// This invocation has been resolved in the current encoding context, and holds a reference to +/// the definition of the macro being invoked. +#[derive(Copy, Clone, Debug)] +pub enum MacroExpr<'top, D: LazyDecoder> { + /// A macro invocation found in the body of a template. + TemplateMacro(TemplateMacroInvocation<'top>), + /// A macro invocation found in the data stream. + EExp(EExpression<'top, D>), +} + +impl<'top, D: LazyDecoder> MacroExpr<'top, D> { + fn id(&self) -> MacroIdRef { + match &self { + MacroExpr::TemplateMacro(m) => m.id(), + MacroExpr::EExp(e) => e.id(), + } + } + + fn arguments(&self, environment: Environment<'top, D>) -> MacroExprArgsIterator<'top, D> { + let args_kind = match &self { + MacroExpr::TemplateMacro(m) => { + MacroExprArgsKind::<'top, D>::Macro(m.arguments(environment)) + } + MacroExpr::EExp(e) => MacroExprArgsKind::<'top, D>::EExp(e.arguments()), + }; + MacroExprArgsIterator { source: args_kind } + } + + fn invoked_macro(&self) -> MacroRef<'top> { + match &self { + MacroExpr::TemplateMacro(m) => m.invoked_macro(), + MacroExpr::EExp(e) => e.invoked_macro(), + } + } +} + +pub enum MacroExprArgsKind<'top, D: LazyDecoder> { + Macro(TemplateMacroInvocationArgsIterator<'top, D>), + EExp(EExpressionArgsIterator<'top, D>), +} + +pub struct MacroExprArgsIterator<'top, D: LazyDecoder> { + source: MacroExprArgsKind<'top, D>, +} + +impl<'top, D: LazyDecoder> Iterator for MacroExprArgsIterator<'top, D> { + type Item = IonResult>; + + fn next(&mut self) -> Option { + match &mut self.source { + MacroExprArgsKind::Macro(m) => m.next(), + MacroExprArgsKind::EExp(e) => e.next(), + } + } } /// A single expression appearing in argument position within a macro invocation. -pub enum ArgumentKind<'top, 'data, D: LazyDecoder<'data>, M: MacroInvocation<'data, D>> { +#[derive(Debug, Copy, Clone)] +pub enum ArgExpr<'top, D: LazyDecoder> { /// An Ion value that requires no further evaluation. - ValueLiteral(LazyExpandedValue<'top, 'data, D>), + // `LazyExpandedValue` can be backed by either a stream value or a template value, so it covers + // both contexts. + ValueLiteral(LazyExpandedValue<'top, D>), /// A variable name that requires expansion. - Variable(RawSymbolTokenRef<'top>), + // Variable references can only appear in template macro invocations. + Variable(TemplateBodyVariableReference), /// A macro invocation that requires evaluation. - MacroInvocation(M), + MacroInvocation(MacroExpr<'top, D>), } -/// Converts a syntactic element appearing in argument position into an [`ArgumentKind`] using the -/// provided [`EncodingContext`]. -pub trait ToArgumentKind<'data, D: LazyDecoder<'data>, M: MacroInvocation<'data, D>> { - fn to_arg_expr<'top>(self, context: EncodingContext<'top>) -> ArgumentKind<'top, 'data, D, M> - where - Self: 'top; +impl<'top, D: LazyDecoder> ArgExpr<'top, D> { + /// If this `ArgExpr` is a variable reference, resolves it to an expression from its originating + /// environment. Returns an `ArgValueExpr` which is the value literal or macro invocation to + /// which the variable referred. + /// Otherwise, passes through the value literal or macro invocation. + pub(crate) fn resolve( + &self, + environment: Environment<'top, D>, + ) -> IonResult> { + match self { + ArgExpr::ValueLiteral(value) => Ok(ValueExpr::ValueLiteral(*value)), + ArgExpr::Variable(variable) => environment + .get_expected(variable.signature_index()) + .copied(), + ArgExpr::MacroInvocation(invocation) => Ok(ValueExpr::MacroInvocation(*invocation)), + } + } +} + +/// A value expression (i.e. value literal or macro invocation) found in any context. +/// +/// A `ValueExpr` is a resolved value. It cannot be a variable reference. If it is a macro +/// invocation, it holds a reference to the definition of the macro it invokes. +#[derive(Debug, Copy, Clone)] +pub enum ValueExpr<'top, D: LazyDecoder> { + /// An Ion value that requires no further evaluation. + // `LazyExpandedValue` can be backed by either a stream value or a template value, so it covers + // both contexts. + ValueLiteral(LazyExpandedValue<'top, D>), + /// A macro invocation that requires evaluation. + MacroInvocation(MacroExpr<'top, D>), } /// Indicates which of the supported macros this represents and stores the state necessary to /// continue evaluating that macro. -pub enum MacroExpansionKind<'data, D: LazyDecoder<'data>, M: MacroInvocation<'data, D>> { +pub enum MacroExpansionKind<'top, D: LazyDecoder> { Void, - Values(ValuesExpansion<'data, D, M>), - MakeString(MakeStringExpansion<'data, D, M>), - // TODO: The others, including template macros. - // TODO: Treat variables as a special kind of macro invocation, similar to `values` but without - // an accessible entry in the macro table. + Values(ValuesExpansion<'top, D>), + MakeString(MakeStringExpansion<'top, D>), + Template(TemplateExpansion<'top>), } /// A macro in the process of being evaluated. Stores both the state of the evaluation and the /// syntactic element that represented the macro invocation. -pub struct MacroExpansion<'data, D: LazyDecoder<'data>, M: MacroInvocation<'data, D>> { - kind: MacroExpansionKind<'data, D, M>, - invocation: M, +pub struct MacroExpansion<'top, D: LazyDecoder> { + kind: MacroExpansionKind<'top, D>, + invocation: MacroExpr<'top, D>, } -impl<'data, D: LazyDecoder<'data>, M: MacroInvocation<'data, D>> Debug - for MacroExpansion<'data, D, M> -{ +impl<'top, D: LazyDecoder> MacroExpansion<'top, D> { + pub(crate) fn new(kind: MacroExpansionKind<'top, D>, invocation: MacroExpr<'top, D>) -> Self { + Self { kind, invocation } + } +} + +impl<'top, D: LazyDecoder> Debug for MacroExpansion<'top, D> { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - write!(f, "", self.invocation) + let name = match &self.kind { + MacroExpansionKind::Void => "void", + MacroExpansionKind::Values(_) => "values", + MacroExpansionKind::MakeString(_) => "make_string", + MacroExpansionKind::Template(t) => { + return write!(f, "", t.template.name()) + } + }; + write!(f, "") } } -impl<'data, D: LazyDecoder<'data>, M: MacroInvocation<'data, D>> MacroExpansion<'data, D, M> { +impl<'top, D: LazyDecoder> MacroExpansion<'top, D> { /// Continues evaluating this macro until it: /// * produces another value. /// * encounters another macro or variable that needs to be expanded. /// * is completed. - fn next<'top>( + fn next( &mut self, context: EncodingContext<'top>, - ) -> IonResult> - where - 'data: 'top, - M: 'top, - { + environment: Environment<'top, D>, + ) -> IonResult>> { use MacroExpansionKind::*; // Delegate the call to `next()` based on the macro kind. match &mut self.kind { - MakeString(make_string_expansion) => make_string_expansion.next(context), - Values(values_expansion) => values_expansion.next(context), + MakeString(make_string_expansion) => make_string_expansion.next(context, environment), + Values(values_expansion) => values_expansion.next(context, environment), // `void` is trivial and requires no delegation - Void => Ok(MacroExpansionStep::Complete), + Void => Ok(None), + Template(template_expansion) => template_expansion.next(context, environment), } } } -/// Represents a single step in the process of evaluating a macro. -pub enum MacroExpansionStep<'top, 'data, D: LazyDecoder<'data>, M: MacroInvocation<'data, D>> { - /// The next value produced by continuing the macro evaluation. - ExpandedValue(LazyExpandedValue<'top, 'data, D>), - /// Another macro that will need to be evaluated before an expanded value can be returned. - AnotherMacroToEvaluate(M), - /// This macro will not produce any further values. - Complete, -} +pub type MacroStack<'top, D> = BumpVec<'top, MacroExpansion<'top, D>>; +pub type EnvironmentStack<'top, D> = BumpVec<'top, Environment<'top, D>>; /// Evaluates macro invocations recursively, yielding a single expanded value at a time. /// -/// This evaluator can be used in a variety of contexts. It supports the cross product of three -/// use case dimensions: +/// The evaluator supports the cross product of three use case dimensions: /// /// {e-expression, template macro invocation} /// x {text, binary} /// x {eager, lazy} -pub struct MacroEvaluator< - 'data, - // The Ion format of the data stream - D: LazyDecoder<'data>, - // The syntactic element representing the e-expression or template macro invocation - M: MacroInvocation<'data, D>, - // The storage being used to store the macro expansion stack. (Either a long-lived `Vec` or - // a bumpalo [`Bump`](BumpVec) whose contents only live as long as the reader is parked on - // the same top-level value). - S: Stack>, -> { - // A stack of all of the macro invocations currently being evaluated. - macro_stack: S, - spooky: PhantomData<(&'data D, M)>, +/// +/// For incremental/lazy evaluation, push a macro invocation onto the stack using +/// [`MacroEvaluator::push`] and then use [`MacroEvaluator::next`] to evaluate the next value. +/// +/// For eager evaluation, use [`MacroEvaluator::evaluate`], which returns an iterator that will +/// yield the expanded values. +pub struct MacroEvaluator<'top, D: LazyDecoder> { + // Holds references to the macro table, symbol table, and bump allocator. + context: EncodingContext<'top>, + // A stack with the most recent macro invocations at the top. This stack grows each time a macro + // of any kind begins evaluation. + macro_stack: MacroStack<'top, D>, + // A stack of _template_ macro invocation environments. This stack only grows when a template + // macro is invoked from any context. For example, given these template definitions: + // (macro foo (x) (values 1 2 x)) + // (macro bar (y) (foo y)) + // and this invocation: + // (:bar 3) + // A new environment [/*y=*/ 3] would be pushed for the invocation of `bar`, and another + // environment [/*x=y=*/ 3] would be pushed for the invocation of `foo` within `bar`. However, + // no environment would be created/pushed for the invocation of the `values` macro within `foo`. + // For any macro being evaluated, the current environment is always the one at the top of the + // environment stack. + env_stack: EnvironmentStack<'top, D>, } -impl< - 'data, - D: LazyDecoder<'data>, - M: MacroInvocation<'data, D>, - S: Stack>, - > MacroEvaluator<'data, D, M, S> -{ - /// Constructs a `MacroEvaluator` that uses storage with a static lifetime. - pub fn new() -> MacroEvaluator<'data, D, M, Vec>> { - MacroEvaluator { - macro_stack: Vec::new(), - spooky: PhantomData, +impl<'top, D: LazyDecoder> MacroEvaluator<'top, D> { + pub fn new(context: EncodingContext<'top>, environment: Environment<'top, D>) -> Self { + let macro_stack = BumpVec::new_in(context.allocator); + let mut env_stack = BumpVec::new_in(context.allocator); + env_stack.push(environment); + Self { + macro_stack, + env_stack, + context, } } +} + +impl<'top, D: LazyDecoder> MacroEvaluator<'top, D> { + /// Returns the number of macros that are currently being evaluated. + pub fn macro_stack_depth(&self) -> usize { + self.macro_stack.len() + } + + /// Returns the current environment (i.e. the one at the top of the macro stack.) + pub fn environment(&self) -> Environment<'top, D> { + // The stack is never completely empty; the 'root' evaluator is created with an empty + // environment at the base of the stack. + *self.env_stack.last().unwrap() + } - /// Constructs a `MacroEvaluator` with a lifetime tied to the current [`EncodingContext`]. - pub fn new_transient<'top>( + /// Creates a new `Environment` for the given `invocation`. + /// + /// This helper function iterates over the argument expressions in the invocation. If an argument + /// expression is a value literal or macro invocation, it is added to the new environment as-is. + /// If an argument is a variable reference, it is substituted with the corresponding value literal + /// or macro invocation from the current environment and then added to the new environment. + fn make_new_evaluation_environment( + &mut self, context: EncodingContext<'top>, - ) -> MacroEvaluator<'data, D, M, BumpVec<'top, MacroExpansion<'data, D, M>>> { - MacroEvaluator { - macro_stack: BumpVec::new_in(context.allocator), - spooky: PhantomData, + invocation: MacroExpr<'top, D>, + ) -> IonResult> { + let mut args = BumpVec::new_in(context.allocator); + for arg in invocation.arguments(self.environment()) { + args.push(arg?); } + let environment = Environment::new(args); + Ok(environment) } - /// Finds the macro corresponding to the ID in the invocation in the specified encoding context. - /// Returns an error if the macro cannot be found. Otherwise, returns a [`MacroExpansion`] - /// containing the original invocation and the initialized state needed to evaluate it. - fn resolve_invocation<'top>( + /// Initializes a [`MacroExpansion`] that contains the necessary state to incrementally evaluate + /// the provided macro invocation. + /// + /// Returns an error if the invocation is invalid due to missing or malformed arguments. + fn initialize_expansion( + &mut self, context: EncodingContext<'top>, - invocation_to_evaluate: M, - initial_eval_stack_depth: usize, - ) -> IonResult> { - // Get the `MacroKind` corresponding to the given ID. It contains either a name (`&str`) or - // an address (`usize`). - let macro_kind = match invocation_to_evaluate.id() { - MacroIdRef::LocalName(name) => { - context.macro_table.macro_with_name(name).ok_or_else(|| { - IonError::decoding_error(format!( - "unrecognized macro name '{name}' in {:?}", - invocation_to_evaluate - )) - }) - } - MacroIdRef::LocalAddress(address) => context - .macro_table - .macro_at_address(address) - .ok_or_else(|| { - IonError::decoding_error(format!( - "invalid macro address '{address}' in {:?}", - invocation_to_evaluate - )) - }), - }?; - + invocation_to_evaluate: MacroExpr<'top, D>, + ) -> IonResult> { // Initialize a `MacroExpansionKind` with the state necessary to evaluate the requested // macro. - let expansion_kind = match macro_kind { + let expansion_kind = match invocation_to_evaluate.invoked_macro().kind() { MacroKind::Void => MacroExpansionKind::Void, MacroKind::Values => MacroExpansionKind::Values(ValuesExpansion { - arguments: invocation_to_evaluate.arguments(), - initial_eval_stack_depth, + arguments: invocation_to_evaluate.arguments(self.environment()), + initial_eval_stack_depth: self.macro_stack_depth(), }), MacroKind::MakeString => MacroExpansionKind::MakeString(MakeStringExpansion::new( - invocation_to_evaluate.arguments(), + invocation_to_evaluate.arguments(self.environment()), )), + MacroKind::Template(template) => { + let template_address = invocation_to_evaluate.invoked_macro().address(); + let template_ref = TemplateMacroRef::new(template_address, template); + let new_environment = + self.make_new_evaluation_environment(context, invocation_to_evaluate)?; + self.env_stack.push(new_environment); + MacroExpansionKind::Template(TemplateExpansion::new(template_ref)) + } }; Ok(MacroExpansion { kind: expansion_kind, @@ -221,15 +342,28 @@ impl< /// Given a syntactic element representing a macro invocation, attempt to resolve it with the /// current encoding context and push the resulting `MacroExpansion` onto the stack. - pub fn push(&mut self, context: EncodingContext, invocation: M) -> IonResult<()> { - let expansion = Self::resolve_invocation(context, invocation, self.stack_depth() + 1)?; + pub fn push( + &mut self, + context: EncodingContext<'top>, + invocation: impl Into>, + ) -> IonResult<()> { + let macro_expr = invocation.into(); + let expansion = self.initialize_expansion(context, macro_expr)?; self.macro_stack.push(expansion); Ok(()) } - /// The number of macros in the process of being evaluated. - pub fn stack_depth(&self) -> usize { - self.macro_stack.len() + /// Continues evaluating the macro at the top of the stack until either: + /// * a value is yielded + /// * the macro stack is empty (that is: all macro evaluations are complete) + /// + /// This is equivalent to calling [`next_at_or_above_depth`](Self::next_at_or_above_depth) + /// with a `depth_to_exhaust` of `0`; see that method's documentation for more details. + pub fn next( + &mut self, + context: EncodingContext<'top>, + ) -> IonResult>> { + self.next_at_or_above_depth(context, 0) } /// Continues evaluating the macro at the top of the stack until either: @@ -246,23 +380,21 @@ impl< /// is `0`, `next()` will return `None` when all macros on the stack are exhausted. /// /// The caller must verify that the stack's depth is greater than or equal to `depth_to_exhaust` - /// before calling `next()`. - pub fn next<'top>( + /// before calling `next_at_or_above_depth()`. + pub fn next_at_or_above_depth( &mut self, context: EncodingContext<'top>, depth_to_exhaust: usize, - ) -> IonResult>> - where - 'data: 'top, - M: 'top, - { + ) -> IonResult>> { debug_assert!( - self.stack_depth() >= depth_to_exhaust, + self.macro_stack_depth() >= depth_to_exhaust, "asked to exhaust a macro at an invalid depth" ); + loop { + let environment = self.environment(); // Get the expansion at the top of the stack. - let current_expansion = match self.macro_stack.peek_mut() { + let current_expansion = match self.macro_stack.last_mut() { // NOTE: If the user specifies a `depth_to_exhaust` of 0, this is where the loop // will end. Behaviorally, this is identical to a `depth_to_exhaust` of 1, // which would return `Ok(None)` at the bottom of this method. It is always @@ -273,22 +405,34 @@ impl< }; // Ask that expansion to continue its evaluation by one step. - use MacroExpansionStep::*; - match current_expansion.next(context)? { + use ValueExpr::*; + match current_expansion.next(context, environment)? { // If we get a value, return it to the caller. - ExpandedValue(value) => return Ok(Some(value)), + Some(ValueLiteral(value)) => { + return Ok(Some(value)); + } // If we get another macro, push it onto the stack and continue evaluation. - AnotherMacroToEvaluate(invocation) => { + Some(MacroInvocation(invocation)) => { // If we encounter another macro invocation, put it on top of the stack. self.push(context, invocation)?; continue; } // If the current macro reports that its expansion is complete... - Complete => { - // ...pop it off the stack... - let _popped = self.macro_stack.pop().unwrap(); + None => { + // Check to see if the completed value was a template. If so, discard its environment. + let completed_kind = &self.macro_stack.last().unwrap().kind; + if matches!(completed_kind, MacroExpansionKind::Template(_)) { + // NB: Here and below, we use `truncate()` instead of `pop()` so the value can + // be dropped in place without incurring a move. That move runs afoul of the + // aliasing requirements that `miri` looks for, though I'm unsure why. + // Once Polonius lands and we are able to remove the `unsafe` usages in + // the LazyExpandingReader, this will be unnecessary. + self.env_stack.truncate(self.env_stack.len() - 1); + } + self.macro_stack.truncate(self.macro_stack.len() - 1); + // ...and see that was the macro the caller was interested in evaluating. - if self.stack_depth() < depth_to_exhaust { + if self.macro_stack.len() < depth_to_exhaust { // If so, there are no more values to yield, even though there may still // be macros on the stack. return Ok(None); @@ -303,97 +447,33 @@ impl< /// Attempts to resolve the provided `invocation` in the specified `context`. Upon success, /// returns an iterator that lazily computes the expansion of the macro invocation and yields /// its values. - pub(crate) fn evaluate<'iter, 'top>( + pub fn evaluate<'iter>( &'iter mut self, context: EncodingContext<'top>, - invocation: M, - ) -> IonResult> { - self.push(context, invocation)?; + invocation: impl Into>, + ) -> IonResult> + where + Self: Sized, + { + self.push(context, invocation.into())?; Ok(EvaluatingIterator::new(self, context)) } } -// ===== Type aliases for commonly used flavors of `MacroEvaluator` ===== - -/// A [`MacroEvaluator`] for expanding e-expressions found in the data stream of the format `D`. -pub type EExpEvaluator<'data, D> = MacroEvaluator< - 'data, - D, - >::MacroInvocation, - // A Vec with a static lifetime allows this to carry state over between top-level values. - Vec>::MacroInvocation>>, ->; - -/// Like [`EExpEvaluator`], but can only be used for the duration of the lifetime `'top`. This is -/// used when a macro expansion needs to perform expansions of its own without yielding flow control -/// to the primary evaluator. -/// -/// For example, the `(:make_string ...)` macro needs to evaluate each of its arguments to produce -/// a series of text values that it can concatenate. Those arguments may themselves be macro -/// invocations. However, we need to eagerly evaluate them to return `:make_string`'s only output -/// value: -/// -/// ```ion_1_1 -/// (:make_string -/// (:values a b c) // Macro invocation argument -/// (:make_string d e) // Macro invocation argument -/// f) // => "abcdef" -/// ``` -/// -/// The MacroExpansion holding `:make_string`'s mutable state lives in the stack of the primary -/// evaluator, making it (practically) impossible to modify the stack by pushing another -/// MacroExpansion onto it. Instead, it creates an evaluator of its own using short-lived, -/// bump-allocated storage and fully evaluates each argument. -pub type TransientEExpEvaluator<'top, 'data, D> = MacroEvaluator< - 'data, - D, - >::MacroInvocation, - // A BumpVec allows us to very cheaply store state knowing that it must be discarded when the - // reader advances to the next top-level value. - BumpVec<'top, MacroExpansion<'data, D, >::MacroInvocation>>, ->; - -/// A [`MacroEvaluator`] for expanding macro invocations found in a template body, all in the context -/// of a data stream in the format `D`. -pub type TdlMacroEvaluator<'top, 'data, D> = - MacroEvaluator<'data, D, &'top Sequence, Vec>>; - -pub type TransientTdlMacroEvaluator<'top, 'data, D> = MacroEvaluator< - 'data, - D, - &'top Sequence, - BumpVec<'top, MacroExpansion<'data, D, &'top Sequence>>, ->; - /// Yields the values produced by incrementally evaluating the macro that was at the top of the /// evaluator's stack when the iterator was created. -pub struct EvaluatingIterator< - 'iter, - 'top: 'iter, - 'data: 'top, - D: LazyDecoder<'data>, - M: MacroInvocation<'data, D>, - S: Stack>, -> { - evaluator: &'iter mut MacroEvaluator<'data, D, M, S>, +pub struct EvaluatingIterator<'iter, 'top, D: LazyDecoder> { + evaluator: &'iter mut MacroEvaluator<'top, D>, context: EncodingContext<'top>, initial_stack_depth: usize, } -impl< - 'iter, - 'top, - 'data: 'top, - D: LazyDecoder<'data>, - M: MacroInvocation<'data, D>, - S: Stack>, - > EvaluatingIterator<'iter, 'top, 'data, D, M, S> -{ +impl<'iter, 'top, D: LazyDecoder> EvaluatingIterator<'iter, 'top, D> { pub fn new( - evaluator: &'iter mut MacroEvaluator<'data, D, M, S>, + evaluator: &'iter mut MacroEvaluator<'top, D>, context: EncodingContext<'top>, ) -> Self { - let initial_stack_depth = evaluator.stack_depth(); + let initial_stack_depth = evaluator.macro_stack_depth(); Self { evaluator, context, @@ -402,20 +482,12 @@ impl< } } -impl< - 'iter, - 'top, - 'data: 'top, - D: LazyDecoder<'data>, - M: MacroInvocation<'data, D> + 'top, - S: Stack>, - > Iterator for EvaluatingIterator<'iter, 'top, 'data, D, M, S> -{ - type Item = IonResult>; +impl<'iter, 'top, D: LazyDecoder> Iterator for EvaluatingIterator<'iter, 'top, D> { + type Item = IonResult>; fn next(&mut self) -> Option { self.evaluator - .next(self.context, self.initial_stack_depth) + .next_at_or_above_depth(self.context, self.initial_stack_depth) .transpose() } } @@ -432,47 +504,30 @@ impl< /// (:values 1) => 1 /// (:values 1 2 3) => 1 2 3 /// (:values 1 2 (:values 3 4)) => 1 2 3 4 -pub struct ValuesExpansion<'data, D: LazyDecoder<'data>, M: MacroInvocation<'data, D>> { +pub struct ValuesExpansion<'top, D: LazyDecoder> { // Which argument the macro is in the process of expanding - arguments: M::ArgumentsIterator, + arguments: MacroExprArgsIterator<'top, D>, // The stack depth where this `values` call lives. When the stack shrinks below this depth, // evaluation is complete. initial_eval_stack_depth: usize, } -impl<'data, D: LazyDecoder<'data>, M: MacroInvocation<'data, D>> ValuesExpansion<'data, D, M> { - pub fn new(arguments: M::ArgumentsIterator, initial_eval_stack_depth: usize) -> Self { +impl<'top, D: LazyDecoder> ValuesExpansion<'top, D> { + pub fn new(arguments: MacroExprArgsIterator<'top, D>, initial_eval_stack_depth: usize) -> Self { Self { arguments, initial_eval_stack_depth, } } - /// Yields the next [`MacroExpansionStep`] in this macro's evaluation. - pub fn next<'top>( + /// Yields the next [`ValueExpr`] in this macro's evaluation. + pub fn next( &mut self, - context: EncodingContext<'top>, - ) -> IonResult> - where - 'data: 'top, - M: 'top, - { + _context: EncodingContext<'top>, + _environment: Environment<'top, D>, + ) -> IonResult>> { // We visit the argument expressions in the invocation in order from left to right. - let arg_expr = match self.arguments.next() { - Some(Err(e)) => return Err(e), - Some(Ok(arg)) => arg.to_arg_expr(context), - None => return Ok(MacroExpansionStep::Complete), - }; - - match arg_expr { - // If the argument is a value, return it. - ArgumentKind::ValueLiteral(value) => Ok(MacroExpansionStep::ExpandedValue(value)), - ArgumentKind::Variable(_variable) => todo!("variable expansion"), - // If the argument is a macro invocation, yield it that so the evaluator can push it onto the stack. - ArgumentKind::MacroInvocation(invocation) => { - Ok(MacroExpansionStep::AnotherMacroToEvaluate(invocation)) - } - } + self.arguments.next().transpose() } } @@ -496,34 +551,29 @@ impl<'data, D: LazyDecoder<'data>, M: MacroInvocation<'data, D>> ValuesExpansion /// (:make_string (:values "first" "_") $4) => "first_name" /// (:make_string) => "" /// (:make_string "foo" 7) => Error -pub struct MakeStringExpansion<'data, D: LazyDecoder<'data>, M: MacroInvocation<'data, D>> { - arguments: M::ArgumentsIterator, +pub struct MakeStringExpansion<'top, D: LazyDecoder> { + arguments: MacroExprArgsIterator<'top, D>, is_complete: bool, - spooky: PhantomData, } -impl<'data, D: LazyDecoder<'data>, M: MacroInvocation<'data, D>> MakeStringExpansion<'data, D, M> { - pub fn new(arguments: M::ArgumentsIterator) -> Self { +impl<'top, D: LazyDecoder> MakeStringExpansion<'top, D> { + pub fn new(arguments: MacroExprArgsIterator<'top, D>) -> Self { Self { arguments, is_complete: false, - spooky: Default::default(), } } - /// Yields the next [`MacroExpansionStep`] in this macro's evaluation. - pub fn next<'top>( + /// Yields the next [`ValueExpr`] in this `make_string` macro's evaluation. + pub fn next( &mut self, context: EncodingContext<'top>, - ) -> IonResult> - where - 'data: 'top, - M: 'top, - { + environment: Environment<'top, D>, + ) -> IonResult>> { // `make_string` always produces a single value. Once that value has been returned, it needs // to report `Complete` on the following call to `next()`. if self.is_complete { - return Ok(MacroExpansionStep::Complete); + return Ok(None); } // Create a bump-allocated buffer to hold our constructed string @@ -534,19 +584,15 @@ impl<'data, D: LazyDecoder<'data>, M: MacroInvocation<'data, D>> MakeStringExpan // inside the evaluator, we cannot get a simultaneous mutable reference to the evaluator // itself. Instead, we use the bump allocator the make a transient macro evaluator // whose resources can be trivially reclaimed when the expansion is done. - let mut evaluator = - MacroEvaluator::<'data, D, M, BumpVec<'top, MacroExpansion>>::new_transient( - context, - ); + let mut evaluator = MacroEvaluator::new(context, environment); - for arg in self.arguments.by_ref() { - let arg_expr: ArgumentKind = arg?.to_arg_expr(context); + for arg_result in &mut self.arguments { + let arg_expr = arg_result?; match arg_expr { - ArgumentKind::ValueLiteral(value) => { + ValueExpr::ValueLiteral(value) => { Self::append_expanded_raw_text_value(context, &mut buffer, value.read()?)? } - ArgumentKind::Variable(_variable) => todo!("variable expansion"), - ArgumentKind::MacroInvocation(invocation) => { + ValueExpr::MacroInvocation(invocation) => { for value_result in evaluator.evaluate(context, invocation)? { let value = value_result?; let expanded = value.read()?; @@ -556,25 +602,25 @@ impl<'data, D: LazyDecoder<'data>, M: MacroInvocation<'data, D>> MakeStringExpan } } - let empty_annotations = BumpVec::new_in(context.allocator); - // Convert our BumpString<'bump> into a &'bump str that we can wrap in an `ExpandedValueRef` let constructed_text = buffer.into_bump_str(); - let expanded_value_ref = ExpandedValueRef::String(StrRef::from(constructed_text)); + let expanded_value_ref: &'top ExpandedValueRef<'top, D> = context + .allocator + .alloc_with(|| ExpandedValueRef::String(StrRef::from(constructed_text))); + static EMPTY_ANNOTATIONS: &[&str] = &[]; self.is_complete = true; - - Ok(MacroExpansionStep::ExpandedValue(LazyExpandedValue { + Ok(Some(ValueExpr::ValueLiteral(LazyExpandedValue { context, - source: ExpandedValueSource::Constructed((empty_annotations, expanded_value_ref)), - })) + source: ExpandedValueSource::Constructed(EMPTY_ANNOTATIONS, expanded_value_ref), + }))) } /// Appends a string fragment to the `BumpString` being constructed. fn append_expanded_raw_text_value( context: EncodingContext<'_>, buffer: &mut BumpString, - value: ExpandedValueRef<'_, 'data, D>, + value: ExpandedValueRef<'_, D>, ) -> IonResult<()> { match value { ExpandedValueRef::String(text) => buffer.push_str(text.as_ref()), @@ -606,17 +652,68 @@ impl<'data, D: LazyDecoder<'data>, M: MacroInvocation<'data, D>> MakeStringExpan } } +// ===== Implementation of template macro expansion ===== + +/// The evaluation state of a template expansion. +#[derive(Clone, Debug)] +pub struct TemplateExpansion<'top> { + // A reference to the template definition + template: TemplateMacroRef<'top>, + // The current 'step' of the expansion being processed. + step_index: usize, +} + +impl<'top> TemplateExpansion<'top> { + pub fn new(template: TemplateMacroRef<'top>) -> Self { + Self { + template, + step_index: 0, + } + } + + fn next<'data: 'top, D: LazyDecoder>( + &mut self, + context: EncodingContext<'top>, + environment: Environment<'top, D>, + ) -> IonResult>> { + let value_expr = match self.template.body().expressions().get(self.step_index) { + None => return Ok(None), + Some(expr) => expr, + }; + self.step_index += 1; + + let step = match value_expr { + TemplateBodyValueExpr::Element(e) => { + match e.value() { + TemplateValue::List(range) + | TemplateValue::SExp(range) + | TemplateValue::Struct(range) => self.step_index += range.len(), + _ => {} + } + ValueExpr::ValueLiteral(LazyExpandedValue::from_template( + context, + environment, + TemplateElement::new(self.template, e), + )) + } + TemplateBodyValueExpr::Variable(variable) => { + *environment.get_expected(variable.signature_index())? + } + TemplateBodyValueExpr::MacroInvocation(raw_invocation) => { + let invocation = raw_invocation.resolve(self.template, context); + self.step_index += invocation.arg_expressions().len(); + ValueExpr::MacroInvocation(invocation.into()) + } + }; + + Ok(Some(step)) + } +} + #[cfg(test)] mod tests { - use bumpalo::Bump as BumpAllocator; - - use crate::lazy::encoding::TextEncoding_1_1; - use crate::lazy::expanded::macro_evaluator::TdlMacroEvaluator; - use crate::lazy::expanded::macro_table::MacroTable; - use crate::lazy::expanded::EncodingContext; use crate::lazy::reader::LazyTextReader_1_1; - use crate::lazy::value::LazyValue; - use crate::{Element, ElementReader, IonResult, SymbolTable}; + use crate::{ElementReader, IonResult}; /// Reads `input` and `expected` using an expanding reader and asserts that their output /// is the same. @@ -642,31 +739,274 @@ mod tests { /// /// This test exists to demonstrate that macro evaluation within the TDL context works the /// same as evaluation in the data stream. - fn eval_tdl_template_invocation(invocation: &str, expected: &str) -> IonResult<()> { - let macro_table = MacroTable::new(); - let symbol_table = SymbolTable::new(); - let allocator = BumpAllocator::new(); - let context = EncodingContext::new(¯o_table, &symbol_table, &allocator); - let mut evaluator = TdlMacroEvaluator::::new(); - let invocation = Element::read_one(invocation)?; - let actuals = evaluator.evaluate(context, invocation.expect_sexp()?)?; + fn eval_template_invocation( + template_definition: &str, + invocation: &str, + expected: &str, + ) -> IonResult<()> { + let mut reader = LazyTextReader_1_1::new(invocation.as_bytes())?; + let _macro_address = reader.register_template(template_definition)?; + let actual = reader.read_all_elements()?; let mut expected_reader = LazyTextReader_1_1::new(expected.as_bytes())?; - for actual_result in actuals { - // Read the next expected value as a raw value, then wrap it in an `ExpandedRawValueRef` - // so it can be directly compared to the actual. - let expected: Element = expected_reader.next()?.unwrap().read()?.try_into()?; - let actual: Element = LazyValue::from(actual_result?).try_into()?; - assert_eq!(actual, expected); - } + let expected = expected_reader.read_all_elements()?; + assert_eq!(actual, expected); assert!(matches!(expected_reader.next(), Ok(None))); Ok(()) } + #[test] + fn multiple_top_level_values() -> IonResult<()> { + eval_template_invocation( + "(macro foo () (values 1 2 3 4 5))", + r#" + (:foo) + "#, + r#" + 1 2 3 4 5 + "#, + ) + } + + #[test] + fn it_takes_all_kinds() -> IonResult<()> { + eval_template_invocation( + r#"(macro foo () + (values + null + true + 1 + 1e0 + 1.0 + 2023T + "1" + (quote '1') // TODO: Only treat identifiers as variables + {{MQ==}} + {{"1"}} + [1] + (quote (1)) // Prevent the sexp from being considered a macro invocation + {'1':1}))"#, + r#" + (:foo) + "#, + r#" + null + true + 1 + 1e0 + 1.0 + 2023T + "1" + '1' + {{MQ==}} + {{"1"}} + [1] + (1) + {'1':1} + "#, + ) + } + + #[test] + fn emit_symbol_table() -> IonResult<()> { + eval_template_invocation( + r#" + (macro lst (symbols) + $ion_symbol_table::{ + symbols: symbols + } + ) + "#, + r#" + (:lst ["foo", "bar", "baz"]) $10 $11 $12 + "#, + r#" + foo bar baz + "#, + ) + } + + #[test] + fn context_changes_happen_between_top_level_expressions() -> IonResult<()> { + eval_template_invocation( + r#" + (macro lst (symbols) + (values + $ion_symbol_table::{ + symbols: symbols + } + ) + ) + "#, + r#" + $ion_symbol_table::{ + symbols: ["foo", "bar"] + } + + // These symbols refer to the symtab defined above + $10 + $11 + + // The $10 and $11 here _also_ refer to the symtab above because the + // new LST won't be applied until after this top-level expression. + (:values (:lst ["baz", "quux"]) $10 $11) + + // These refer to the new LST + $10 $11 + "#, + r#" + foo bar foo bar baz quux + "#, + ) + } + + #[test] + fn swap() -> IonResult<()> { + eval_template_invocation( + "(macro swap (x y) (values y x))", + r#" + [ + (:swap 1 2), + (:swap foo bar), + (:swap (:values 1 2 3) (:values 4 5 6)) + ] + "#, + r#" + [ + 2, 1, + bar, foo, + 4, 5, 6, 1, 2, 3, + ] + "#, + ) + } + + #[test] + fn new_yorkers() -> IonResult<()> { + eval_template_invocation( + r#" + (macro new_yorker (first last) + { + name: { + first: first, + last: last, + }, + state: "New York", + country: "USA" + } + ) + "#, + r#" + [ + (:new_yorker "Aaron" "Aaronson"), + (:new_yorker "Bettie" "Benowitz"), + (:new_yorker "Carol" "Canterbury"), + ] + "#, + r#" + [ + { + name: { + first: "Aaron", + last: "Aaronson", + }, + state: "New York", + country: "USA" + }, + { + name: { + first: "Bettie", + last: "Benowitz", + }, + state: "New York", + country: "USA" + }, + { + name: { + first: "Carol", + last: "Canterbury", + }, + state: "New York", + country: "USA" + } + ] + "#, + ) + } + + #[test] + fn application_log_event() -> IonResult<()> { + eval_template_invocation( + // Template definition + r#" + (macro event (timestamp thread_id thread_name client_num host_id parameters) + { + 'timestamp': timestamp, + 'threadId': thread_id, + 'threadName': (make_string "scheduler-thread-" thread_name), + 'loggerName': "com.example.organization.product.component.ClassName", + 'logLevel': (quote INFO), + 'format': "Request status: {} Client ID: {} Client Host: {} Client Region: {} Timestamp: {}", + 'parameters': [ + "SUCCESS", + (make_string "example-client-" client_num), + (make_string "aws-us-east-5f-" host_id), + parameters + ] + } + ) + "#, + // Template invocation + r#" + (:event + 1670446800245 + 418 + "6" + "1" + "18b4fa" + (:values + "region 4" + "2022-12-07T20:59:59.744000Z")) + "#, + // Equivalent output + r#" + { + 'timestamp': 1670446800245, + 'threadId': 418, + 'threadName': "scheduler-thread-6", + 'loggerName': "com.example.organization.product.component.ClassName", + 'logLevel': INFO, + 'format': "Request status: {} Client ID: {} Client Host: {} Client Region: {} Timestamp: {}", + 'parameters': [ + "SUCCESS", + "example-client-1", + "aws-us-east-5f-18b4fa", + "region 4", + "2022-12-07T20:59:59.744000Z", + ] + } + "#, + ) + } + + #[test] + fn annotated_template_value() -> IonResult<()> { + eval_template_invocation( + "(macro foo () bar::baz::quux::5)", + r#" + (:foo) + "#, + r#" + bar::baz::quux::5 + "#, + ) + } + #[test] fn values_tdl_macro_invocation() -> IonResult<()> { - eval_tdl_template_invocation( - r"(values 1 2 (values 3 4 (values 5 6) 7 8) 9 10)", + eval_template_invocation( + r"(macro foo () (values 1 2 (values 3 4 (values 5 6) 7 8) 9 10))", + "(:foo)", "1 2 3 4 5 6 7 8 9 10", ) } @@ -686,7 +1026,11 @@ mod tests { #[test] fn void_tdl_macro_invocation() -> IonResult<()> { - eval_tdl_template_invocation(r"(values (void) (void) (void))", "/* nothing */") + eval_template_invocation( + r"(macro foo () (values (void) (void) (void)))", + "(:foo)", + "/* nothing */", + ) } #[test] @@ -707,14 +1051,16 @@ mod tests { #[test] fn make_string_tdl_macro_invocation() -> IonResult<()> { let invocation = r#" - (values + (macro foo () + (values (make_string "foo" '''bar''' "\x62\u0061\U0000007A") (make_string '''Hello''' ''', ''' "world!")) + ) "#; - eval_tdl_template_invocation(invocation, r#" "foobarbaz" "Hello, world!" "#) + eval_template_invocation(invocation, "(:foo)", r#" "foobarbaz" "Hello, world!" "#) } #[test] @@ -728,8 +1074,9 @@ mod tests { #[test] fn macros_inside_a_tdl_list() -> IonResult<()> { - eval_tdl_template_invocation( + eval_template_invocation( r#" + (macro foo () (values [ 1, 2, @@ -741,7 +1088,9 @@ mod tests { (make_string "foo" "bar" "baz"), 7 ]) + ) "#, + "(:foo)", "[1, 2, 3, 4, 5, 6, \"foobarbaz\", 7]", )?; Ok(()) @@ -757,8 +1106,7 @@ mod tests { } // TODO: macros_inside_a_tdl_sexp() - // This requires an implementation of TDL's `(make_sexp)` or `(quote)`. Without these, - // a sexp is always considered a TDL macro invocation. + // This requires an implementation of TDL's `(make_sexp)`. #[test] fn e_expressions_inside_a_struct() -> IonResult<()> { @@ -811,8 +1159,9 @@ mod tests { #[test] fn macros_inside_a_tdl_struct() -> IonResult<()> { - eval_tdl_template_invocation( + eval_template_invocation( r#" + (macro foo () (values { a: 1, @@ -839,7 +1188,9 @@ mod tests { g: 6 }) + ) "#, + "(:foo)", r#" { a: 1, @@ -855,6 +1206,7 @@ mod tests { }, g: 6, } + "#, )?; Ok(()) diff --git a/src/lazy/expanded/macro_table.rs b/src/lazy/expanded/macro_table.rs index 396a519a..c5eba3d1 100644 --- a/src/lazy/expanded/macro_table.rs +++ b/src/lazy/expanded/macro_table.rs @@ -1,15 +1,21 @@ use std::collections::HashMap; +use crate::lazy::expanded::template::{TemplateMacro, TemplateMacroRef}; +use crate::lazy::text::raw::v1_1::reader::{MacroAddress, MacroIdRef}; +use crate::result::IonFailure; +use crate::IonResult; + /// The kinds of macros supported by /// [`MacroEvaluator`](crate::lazy::expanded::macro_evaluator::MacroEvaluator). /// This list parallels /// [`MacroExpansionKind`](crate::lazy::expanded::macro_evaluator::MacroExpansionKind), /// but its variants do not hold any associated state. -#[derive(Debug, Copy, Clone)] +#[derive(Debug, Clone)] pub enum MacroKind { Void, Values, MakeString, + Template(TemplateMacro), } impl MacroKind { @@ -18,16 +24,46 @@ impl MacroKind { MacroKind::Void => "void", MacroKind::Values => "values", MacroKind::MakeString => "make_string", + MacroKind::Template(template) => template.name(), } } } +#[derive(Debug, Copy, Clone)] +pub struct MacroRef<'top> { + address: MacroAddress, + kind: &'top MacroKind, +} + +impl<'top> MacroRef<'top> { + pub fn new(address: MacroAddress, kind: &'top MacroKind) -> Self { + Self { address, kind } + } + pub fn address(&self) -> MacroAddress { + self.address + } + pub fn kind(&self) -> &'top MacroKind { + self.kind + } + + pub fn expect_template(self) -> IonResult> { + if let MacroKind::Template(template) = &self.kind { + return Ok(TemplateMacroRef::new(self.address, template)); + } + IonResult::decoding_error(format!( + "expected a template macro but found {:?}", + self.kind + )) + } +} + /// Allows callers to resolve a macro ID (that is: name or address) to a [`MacroKind`], confirming /// its validity and allowing evaluation to begin. #[derive(Debug)] pub struct MacroTable { macros_by_address: Vec, - macros_by_name: HashMap, + // Maps names to an address that can be used to query the Vec above. + macros_by_name: HashMap, } impl Default for MacroTable { @@ -39,9 +75,9 @@ impl Default for MacroTable { impl MacroTable { pub fn new() -> Self { let macros_by_id = vec![MacroKind::Void, MacroKind::Values, MacroKind::MakeString]; - let mut macros_by_name = HashMap::new(); - for kind in ¯os_by_id { - macros_by_name.insert(kind.name().to_string(), *kind); + let mut macros_by_name = HashMap::default(); + for (id, kind) in macros_by_id.iter().enumerate() { + macros_by_name.insert(kind.name().to_string(), id); } Self { macros_by_address: macros_by_id, @@ -49,11 +85,36 @@ impl MacroTable { } } - pub fn macro_at_address(&self, id: usize) -> Option<&MacroKind> { - self.macros_by_address.get(id) + pub fn macro_with_id(&'_ self, id: MacroIdRef<'_>) -> Option> { + match id { + MacroIdRef::LocalName(name) => self.macro_with_name(name), + MacroIdRef::LocalAddress(address) => self.macro_at_address(address), + } + } + + pub fn macro_at_address(&self, address: usize) -> Option> { + let kind = self.macros_by_address.get(address)?; + Some(MacroRef { address, kind }) } - pub fn macro_with_name(&self, name: &str) -> Option<&MacroKind> { - self.macros_by_name.get(name) + pub fn address_for_name(&self, name: &str) -> Option { + self.macros_by_name.get(name).copied() + } + + pub fn macro_with_name(&self, name: &str) -> Option> { + let address = *self.macros_by_name.get(name)?; + let kind = self.macros_by_address.get(address)?; + Some(MacroRef { address, kind }) + } + + pub fn add_macro(&mut self, template: TemplateMacro) -> IonResult { + let name = template.name(); + if self.macros_by_name.contains_key(name) { + return IonResult::decoding_error(format!("macro named '{name}' already exists")); + } + let id = self.macros_by_address.len(); + self.macros_by_name.insert(name.to_owned(), id); + self.macros_by_address.push(MacroKind::Template(template)); + Ok(id) } } diff --git a/src/lazy/expanded/mod.rs b/src/lazy/expanded/mod.rs index 4e8591b5..fa6bf288 100644 --- a/src/lazy/expanded/mod.rs +++ b/src/lazy/expanded/mod.rs @@ -32,10 +32,10 @@ //! Leaving symbol tokens unresolved is an optimization; annotations, field names, and symbol values //! that are ignored by the reader do not incur the cost of symbol table resolution. +use std::cell::{Cell, UnsafeCell}; use std::fmt::{Debug, Formatter}; use std::iter::empty; -use bumpalo::collections::Vec as BumpVec; use bumpalo::Bump as BumpAllocator; use sequence::{LazyExpandedList, LazyExpandedSExp}; @@ -44,36 +44,38 @@ use crate::element::iterators::SymbolsIterator; use crate::lazy::bytes_ref::BytesRef; use crate::lazy::decoder::{LazyDecoder, LazyRawReader, LazyRawValue}; use crate::lazy::encoding::RawValueLiteral; -use crate::lazy::expanded::macro_evaluator::EExpEvaluator; +use crate::lazy::expanded::compiler::TemplateCompiler; +use crate::lazy::expanded::macro_evaluator::{MacroEvaluator, RawEExpression}; use crate::lazy::expanded::macro_table::MacroTable; use crate::lazy::expanded::r#struct::LazyExpandedStruct; +use crate::lazy::expanded::sequence::Environment; +use crate::lazy::expanded::template::{TemplateElement, TemplateMacro, TemplateValue}; use crate::lazy::r#struct::LazyStruct; -use crate::lazy::raw_stream_item::RawStreamItem; use crate::lazy::raw_value_ref::RawValueRef; use crate::lazy::sequence::{LazyList, LazySExp}; use crate::lazy::str_ref::StrRef; +use crate::lazy::system_reader::{LazySystemReader, PendingLst}; +use crate::lazy::system_stream_item::SystemStreamItem; +use crate::lazy::text::raw::v1_1::reader::MacroAddress; use crate::lazy::value::LazyValue; use crate::raw_symbol_token_ref::AsRawSymbolTokenRef; use crate::result::IonFailure; -use crate::{ - Decimal, Element, Int, IonResult, IonType, RawSymbolTokenRef, SymbolTable, Timestamp, Value, -}; +use crate::{Decimal, Int, IonResult, IonType, RawSymbolTokenRef, SymbolTable, Timestamp}; // All of these modules (and most of their types) are currently `pub` as the lazy reader is gated // behind an experimental feature flag. We may constrain access to them in the future as the code // stabilizes. +pub mod compiler; pub mod e_expression; pub mod macro_evaluator; pub mod macro_table; pub mod sequence; -pub mod stack; pub mod r#struct; -pub mod tdl_macro; pub mod template; /// A collection of resources that can be used to encode or decode Ion values. /// The `'top` lifetime associated with the [`EncodingContext`] reflects the fact that it can only -/// be used as long as the reader is positioned on the same top level value (i.e. the symbol and +/// be used as long as the reader is positioned on the same top level expression (i.e. the symbol and /// macro tables are guaranteed not to change). // It should be possible to loosen this definition of `'top` to include several top level values // as long as the macro and symbol tables do not change between them, though this would require @@ -105,22 +107,22 @@ impl<'top> EncodingContext<'top> { #[derive(Debug)] /// Stream components emitted by a LazyExpandingReader. These items may be encoded directly in the /// stream, or may have been produced by the evaluation of an encoding expression (e-expression). -pub enum ExpandedStreamItem<'top, 'data, D: LazyDecoder<'data>> { +pub enum ExpandedStreamItem<'top, D: LazyDecoder> { /// An Ion Version Marker (IVM) indicating the Ion major and minor version that were used to /// encode the values that follow. VersionMarker(u8, u8), /// An Ion value whose data has not yet been read. For more information about how to read its /// data and (in the case of containers) access any nested values, see the documentation /// for [`LazyRawBinaryValue`](crate::lazy::binary::raw::value::LazyRawBinaryValue). - Value(LazyExpandedValue<'top, 'data, D>), + Value(LazyExpandedValue<'top, D>), /// The end of the stream EndOfStream, } -impl<'top, 'data, D: LazyDecoder<'data>> ExpandedStreamItem<'top, 'data, D> { +impl<'top, D: LazyDecoder> ExpandedStreamItem<'top, D> { /// Returns an error if this stream item is a version marker or the end of the stream. /// Otherwise, returns the lazy value it contains. - fn expect_value(&self) -> IonResult<&LazyExpandedValue<'top, 'data, D>> { + fn expect_value(&self) -> IonResult<&LazyExpandedValue<'top, D>> { match self { ExpandedStreamItem::Value(value) => Ok(value), _ => IonResult::decoding_error(format!("Expected a value, but found a {:?}", self)), @@ -130,129 +132,377 @@ impl<'top, 'data, D: LazyDecoder<'data>> ExpandedStreamItem<'top, 'data, D> { /// A reader that evaluates macro invocations in the data stream and surfaces the resulting /// raw values to the caller. -pub struct LazyExpandingReader<'data, D: LazyDecoder<'data>> { - raw_reader: D::Reader, - evaluator: EExpEvaluator<'data, D>, +pub struct LazyExpandingReader<'data, D: LazyDecoder> { + raw_reader: UnsafeCell>, + // The expanding raw reader needs to be able to return multiple values from a single expression. + // For example, if the raw reader encounters this e-expression: + // + // (:values foo bar baz) + // + // then the expanding reader will need to yield a `foo` on the first call to `next()`, a + // `bar` on the second, and a `baz` on the third. + // + // A natural way to model this in Rust would be to surface an `Expr` type to the user and allow + // them to iterate over the values in its expansion. However, E-expressions are an encoding + // detail; we do not want them to impact the application-layer APIs for reading an Ion stream. + // As such, we need to instead store internal state that persists across an indefinite number + // of calls to `next()`. + // + // The `EncodingContext` passed as an argument to each call to `next()` provides a bump allocator + // whose storage is guaranteed to remain available as long as the reader remains on the same + // top-level expression. When an e-expression is encountered in the data stream, we can store a + // MacroEvaluator there until the reader advances to the next top-level expression. However, + // there is not a lifetime we can use that meets our use case; `'data`--the duration of the + // &[u8] from which we're reading--is too long, and `'top`--the duration of the current call + // to `next()`--is too short. + // + // Instead, we can hold a pointer to the active MacroEvaluator in the bump allocator when one + // is in use. Each time that `next()` is called with the `'top` lifetime, we will dereference + // the pointer and coerce the result into a `&'top mut MacroEvaluator`, allowing the value it + // yields that can be used until `next()` is called again. + // + // Because there is not valid lifetime we can use for the type `*mut MacroEvaluator<'lifetime>`, + // in the field below, we cast away the pointer's type for the purposes of storage and then cast + // it back at dereference time when a 'top lifetime is available. + evaluator_ptr: Cell>, + + // XXX: The `UnsafeCell` wrappers around the fields below are a workaround for + // a limitation in rustc's borrow checker that prevents mutable references from being + // conditionally returned in a loop. + // + // See: https://github.com/rust-lang/rust/issues/70255 + // + // There is a rustc fix for this limitation on the horizon. + // + // See: https://smallcultfollowing.com/babysteps/blog/2023/09/22/polonius-part-1/ + // + // Indeed, using the experimental `-Zpolonius` flag on the nightly compiler allows the + // version of this code without `unsafe` types to work. The alternative to the + // hack is wrapping each field in something like `RefCell`, which adds a small amount of + // overhead to each access. Given that this is the hottest path in the code and that a + // fix is inbound, I think this use of `unsafe` is warranted for now. + // + // Holds information found in symbol tables and encoding directives (TODO) that can be applied + // to the encoding context the next time the reader is between top-level expressions. + pending_lst: UnsafeCell, + // A bump allocator that is cleared between top-level expressions. + allocator: UnsafeCell, + // TODO: Make the symbol and macro tables traits on `D` such that they can be configured + // statically. Then 1.0 types can use `Never` for the macro table. + symbol_table: UnsafeCell, + macro_table: UnsafeCell, } -impl<'data, D: LazyDecoder<'data>> LazyExpandingReader<'data, D> { - pub(crate) fn new(raw_reader: D::Reader) -> Self { +impl<'data, D: LazyDecoder> LazyExpandingReader<'data, D> { + pub(crate) fn new(raw_reader: D::Reader<'data>) -> Self { Self { - raw_reader, - evaluator: EExpEvaluator::new(), + raw_reader: raw_reader.into(), + evaluator_ptr: None.into(), + allocator: BumpAllocator::new().into(), + pending_lst: PendingLst::new().into(), + symbol_table: SymbolTable::new().into(), + macro_table: MacroTable::new().into(), } } - /// Returns the next [`ExpandedStreamItem`] either by continuing to evaluate a macro invocation - /// in progress or by pulling a value from the input stream. - pub fn next<'top>( - &mut self, - context: EncodingContext<'top>, - ) -> IonResult> + // TODO: This method is temporary. It will be removed when the ability to read 1.1 encoding + // directives from the input stream is available. Until then, template creation is manual. + pub fn register_template(&mut self, template_definition: &str) -> IonResult { + let template_macro: TemplateMacro = + { TemplateCompiler::compile_from_text(self.context(), template_definition)? }; + + let macro_table = self.macro_table.get_mut(); + macro_table.add_macro(template_macro) + } + + fn context(&self) -> EncodingContext<'_> { + // SAFETY: The only time that the macro table, symbol table, and allocator can be modified + // is in the body of the method `between_top_level_expressions`. As long as nothing holds + // a reference to the `EncodingContext` we create here when that method is running, + // this is safe. + unsafe { + EncodingContext::new( + &*self.macro_table.get(), + &*self.symbol_table.get(), + &*self.allocator.get(), + ) + } + } + + fn ptr_to_mut_ref<'a, T>(ptr: *mut ()) -> &'a mut T { + let typed_ptr: *mut T = ptr.cast(); + unsafe { &mut *typed_ptr } + } + + /// Dereferences a raw pointer storing the address of the active MacroEvaluator. + fn ptr_to_evaluator<'top>(evaluator_ptr: *mut ()) -> &'top mut MacroEvaluator<'top, D> { + Self::ptr_to_mut_ref(evaluator_ptr) + } + + fn ref_as_ptr(reference: &mut T) -> *mut () { + let ptr: *mut T = reference; + let untyped_ptr: *mut () = ptr.cast(); + untyped_ptr + } + + /// Converts a mutable reference to the active MacroEvaluator into a raw, untyped pointer. + fn evaluator_to_ptr(evaluator: &mut MacroEvaluator<'_, D>) -> *mut () { + Self::ref_as_ptr(evaluator) + } + + /// Updates the encoding context with the information stored in the `PendingLst`. + // TODO: This only works on Ion 1.0 symbol tables for now, hence the name `PendingLst` + fn apply_pending_lst(pending_lst: &mut PendingLst, symbol_table: &mut SymbolTable) { + // If the symbol table's `imports` field had a value of `$ion_symbol_table`, then we're + // appending the symbols it defined to the end of our existing local symbol table. + // Otherwise, we need to clear the existing table before appending the new symbols. + if !pending_lst.is_lst_append { + // We're setting the symbols list, not appending to it. + symbol_table.reset(); + } + // `drain()` empties the pending symbols list + for symbol in pending_lst.symbols.drain(..) { + symbol_table.intern_or_add_placeholder(symbol); + } + pending_lst.is_lst_append = false; + pending_lst.has_changes = false; + } + + /// Inspects a `LazyExpandedValue` to determine whether it is a symbol table or an + /// application-level value. Returns it as the appropriate variant of `SystemStreamItem`. + fn interpret_value<'top>( + &self, + value: LazyExpandedValue<'top, D>, + ) -> IonResult> { + // If this value is a symbol table... + if LazySystemReader::is_symbol_table_struct(&value)? { + // ...traverse it and record any new symbols in our `pending_lst`. + let pending_lst = unsafe { &mut *self.pending_lst.get() }; + LazySystemReader::process_symbol_table(pending_lst, &value)?; + pending_lst.has_changes = true; + let lazy_struct = LazyStruct { + expanded_struct: value.read()?.expect_struct().unwrap(), + }; + return Ok(SystemStreamItem::SymbolTable(lazy_struct)); + } + // Otherwise, it's an application value. + let lazy_value = LazyValue::new(value); + return Ok(SystemStreamItem::Value(lazy_value)); + } + + /// This method is invoked just before the reader begins reading the next top-level expression + /// from the data stream. It is NOT invoked between multiple top level _values_ coming from a + /// single expression. + /// + /// This is the reader's opportunity to make any pending changes to the encoding context. + fn between_top_level_expressions(&self) { + // SAFETY: This is the only place where we modify the encoding context. Take care not to + // alias the allocator, symbol table, or macro table in this scope. + + // We're going to clear the bump allocator, so drop our reference to the evaluator that + // lives there. + self.evaluator_ptr.set(None); + + // Clear the allocator. + let allocator: &mut BumpAllocator = unsafe { &mut *self.allocator.get() }; + allocator.reset(); + + // If the pending LST has changes to apply, do so. + let pending_lst: &mut PendingLst = unsafe { &mut *self.pending_lst.get() }; + if pending_lst.has_changes { + let symbol_table: &mut SymbolTable = unsafe { &mut *self.symbol_table.get() }; + Self::apply_pending_lst(pending_lst, symbol_table); + } + } + + /// Returns the next application-level value. + /// + /// This method will consume and process as many system-level values as possible until it + /// encounters an application-level value or the end of the stream. + pub fn next_value(&mut self) -> IonResult>> { + loop { + match self.next_item()? { + SystemStreamItem::VersionMarker(_, _) => { + // TODO: Handle version changes 1.0 <-> 1.1 + } + SystemStreamItem::SymbolTable(_) => { + // The symbol table is processed by `next_item` before it is returned. There's + // nothing to be done here. + } + SystemStreamItem::Value(value) => return Ok(Some(value)), + SystemStreamItem::EndOfStream => return Ok(None), + } + } + } + + /// Returns the next [`SystemStreamItem`] either by continuing to evaluate a macro invocation + /// in progress or by pulling another expression from the input stream. + pub fn next_item<'top>(&'top self) -> IonResult> where 'data: 'top, { - loop { - if self.evaluator.stack_depth() > 0 { - // If the evaluator still has macro expansions in its stack, we need to give it the - // opportunity to produce the next value. - match self.evaluator.next(context, 0) { - Ok(Some(value)) => return Ok(ExpandedStreamItem::Value(value)), - Ok(None) => { - // While the evaluator had macros in its stack, they did not produce any more - // values. The stack is now empty. - } - Err(e) => return Err(e), - }; - } + // If there's already an active macro evaluator, that means the reader is still in the process + // of expanding a macro invocation it previously encountered. See if it has a value to give us. + if let Some(stream_item) = self.next_from_evaluator()? { + return Ok(stream_item); + } - // If we reach this point, the evaluator's macro stack is empty. We'll pull another - // expression from the input stream. - use RawStreamItem::*; - let expanded_item = match self.raw_reader.next()? { - VersionMarker(major, minor) => ExpandedStreamItem::VersionMarker(major, minor), + // Otherwise, we're now between top level expressions. Take this opportunity to apply any + // pending changes to the encoding context and reset state as needed. + self.between_top_level_expressions(); + + // See if the raw reader can get another expression from the input stream. It's possible + // to find an expression that yields no values (for example: `(:void)`), so we perform this + // step in a loop until we get a value or end-of-stream. + + let allocator: &BumpAllocator = unsafe { &*self.allocator.get() }; + loop { + // Pull another top-level expression from the input stream if one is available. + use crate::lazy::raw_stream_item::RawStreamItem::*; + let raw_reader = unsafe { &mut *self.raw_reader.get() }; + match raw_reader.next(allocator)? { + VersionMarker(major, minor) => { + return Ok(SystemStreamItem::VersionMarker(major, minor)) + } // We got our value; return it. - Value(raw_value) => ExpandedStreamItem::Value(LazyExpandedValue { - source: ExpandedValueSource::ValueLiteral(raw_value), - context, - }), + Value(raw_value) => { + let value = LazyExpandedValue { + source: ExpandedValueSource::ValueLiteral(raw_value), + context: self.context(), + }; + return self.interpret_value(value); + } // It's another macro invocation, we'll start evaluating it. EExpression(e_exp) => { + let context = self.context(); + let resolved_e_exp = e_exp.resolve(context)?; + // Get the current evaluator or make a new one + let evaluator = match self.evaluator_ptr.get() { + // If there's already an evaluator, dereference the pointer. + Some(ptr) => Self::ptr_to_evaluator(ptr), + // If there's not, make a new one. + None => context + .allocator + // E-expressions always have an empty environment + .alloc_with(move || MacroEvaluator::new(context, Environment::empty())), + }; // Push the invocation onto the evaluation stack. - self.evaluator.push(context, e_exp)?; - // Return to the top of the loop to pull the next value (if any) from the evaluator. - continue; + evaluator.push(context, resolved_e_exp)?; + self.evaluator_ptr + .set(Some(Self::evaluator_to_ptr(evaluator))); + + // Try to get a value by starting to evaluate the e-expression. + if let Some(value) = self.next_from_evaluator()? { + // If we get a value, return it. + return Ok(value); + } else { + // If the expression was equivalent to `(:void)`, return to the top of + // the loop and get the next expression. + continue; + } } - EndOfStream => ExpandedStreamItem::EndOfStream, + EndOfStream => return Ok(SystemStreamItem::EndOfStream), }; - return Ok(expanded_item); + } + } + + /// If there is not an evaluation in process, returns `Ok(None)`. + /// If there is an evaluation in process but it does not yield another value, returns `Ok(None)`. + /// If there is an evaluation in process and it yields another value, returns `Ok(Some(value))`. + /// Otherwise, returns `Err`. + fn next_from_evaluator(&self) -> IonResult>> { + let evaluator_ptr = match self.evaluator_ptr.get() { + // There's not currently an evaluator. + None => return Ok(None), + // There's an evaluator in the process of expanding a macro. + Some(ptr) => ptr, + }; + let evaluator = Self::ptr_to_evaluator(evaluator_ptr); + + match evaluator.next(self.context()) { + Ok(Some(value)) => { + // See if this value was a symbol table that needs interpretation. + self.interpret_value(value).map(Some) + } + Ok(None) => { + // While the evaluator had macros in its stack, they did not produce any more + // values. The stack is now empty. + Ok(None) + } + Err(e) => Err(e), } } } /// The source of data backing a [`LazyExpandedValue`]. -#[derive(Debug, Clone)] -pub enum ExpandedValueSource<'top, 'data, D: LazyDecoder<'data>> { +#[derive(Copy, Clone)] +pub enum ExpandedValueSource<'top, D: LazyDecoder> { /// This value was a literal in the input stream. - ValueLiteral(D::Value), + ValueLiteral(D::Value<'top>), /// This value was part of a template definition. - Template(&'top Element), - /// This value was the computed result of a macro invocation like `(:make_string ...)`. + Template(Environment<'top, D>, TemplateElement<'top>), + /// This value was the computed result of a macro invocation like `(:make_string `...)`. Constructed( // TODO: Make this an associated type on the LazyDecoder trait so 1.0 types can set // it to `Never` and the compiler can eliminate this code path where applicable. - ( - // A collection of bump-allocated annotation strings - BumpVec<'top, &'top str>, - ExpandedValueRef<'top, 'data, D>, - ), + // Constructed data stored in the bump allocator. Holding references instead of the data + // itself allows this type (and those that contain it) to impl `Copy`. + &'top [&'top str], // Annotations (if any) + &'top ExpandedValueRef<'top, D>, // Value ), } +impl<'top, D: LazyDecoder> Debug for ExpandedValueSource<'top, D> { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match &self { + ExpandedValueSource::ValueLiteral(v) => write!(f, "{v:?}"), + ExpandedValueSource::Template(_, template_element) => { + write!(f, "{:?}", template_element.value()) + } + ExpandedValueSource::Constructed(_, value) => write!(f, "{value:?}"), + } + } +} + // Converts the raw value literal types associated with each format decoder (e.g. LazyRawTextValue_1_1) // into an ExpandedValueSource. -impl<'top, 'data, V: RawValueLiteral, D: LazyDecoder<'data, Value = V>> From - for ExpandedValueSource<'top, 'data, D> +impl<'top, V: RawValueLiteral, D: LazyDecoder = V>> From + for ExpandedValueSource<'top, D> { fn from(value: V) -> Self { ExpandedValueSource::ValueLiteral(value) } } -// Converts an Element from the body of a template into an ExpandedValueSource. -impl<'top, 'data, D: LazyDecoder<'data>> From<&'top Element> - for ExpandedValueSource<'top, 'data, D> -{ - fn from(element: &'top Element) -> Self { - ExpandedValueSource::Template(element) - } -} - /// A value produced by expanding the 'raw' view of the input data. -#[derive(Clone)] -pub struct LazyExpandedValue<'top, 'data, D: LazyDecoder<'data>> { +#[derive(Copy, Clone)] +pub struct LazyExpandedValue<'top, D: LazyDecoder> { pub(crate) context: EncodingContext<'top>, - pub(crate) source: ExpandedValueSource<'top, 'data, D>, + pub(crate) source: ExpandedValueSource<'top, D>, } -impl<'top, 'data, D: LazyDecoder<'data>> Debug for LazyExpandedValue<'top, 'data, D> { +impl<'top, D: LazyDecoder> Debug for LazyExpandedValue<'top, D> { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { write!(f, "{:?}", self.source) } } -impl<'top, 'data: 'top, D: LazyDecoder<'data>> LazyExpandedValue<'top, 'data, D> { - pub(crate) fn from_value(context: EncodingContext<'top>, value: D::Value) -> Self { +impl<'top, D: LazyDecoder> LazyExpandedValue<'top, D> { + pub(crate) fn from_value(context: EncodingContext<'top>, value: D::Value<'top>) -> Self { Self { context, source: ExpandedValueSource::ValueLiteral(value), } } - pub(crate) fn from_template(context: EncodingContext<'top>, element: &'top Element) -> Self { + pub(crate) fn from_template( + context: EncodingContext<'top>, + environment: Environment<'top, D>, + element: TemplateElement<'top>, + ) -> Self { Self { context, - source: ExpandedValueSource::Template(element), + source: ExpandedValueSource::Template(environment, element), } } @@ -260,8 +510,8 @@ impl<'top, 'data: 'top, D: LazyDecoder<'data>> LazyExpandedValue<'top, 'data, D> use ExpandedValueSource::*; match &self.source { ValueLiteral(value) => value.ion_type(), - Template(element) => element.ion_type(), - Constructed((_annotations, value)) => value.ion_type(), + Template(_, element) => element.value().ion_type(), + Constructed(_annotations, value) => value.ion_type(), } } @@ -269,23 +519,23 @@ impl<'top, 'data: 'top, D: LazyDecoder<'data>> LazyExpandedValue<'top, 'data, D> use ExpandedValueSource::*; match &self.source { ValueLiteral(value) => value.is_null(), - Template(element) => element.is_null(), - Constructed((_annotations, value)) => { + Template(_, element) => element.value().is_null(), + Constructed(_, value) => { matches!(value, ExpandedValueRef::Null(_)) } } } - pub fn annotations(&self) -> ExpandedAnnotationsIterator<'top, 'data, D> { + pub fn annotations(&self) -> ExpandedAnnotationsIterator<'top, D> { use ExpandedValueSource::*; match &self.source { ValueLiteral(value) => ExpandedAnnotationsIterator::new( ExpandedAnnotationsSource::ValueLiteral(value.annotations()), ), - Template(element) => ExpandedAnnotationsIterator::new( - ExpandedAnnotationsSource::Template(element.annotations().iter()), + Template(_, element) => ExpandedAnnotationsIterator::new( + ExpandedAnnotationsSource::Template(SymbolsIterator::new(element.annotations())), ), - Constructed((_annotations, _value)) => { + Constructed(_annotations, _value) => { // TODO: iterate over constructed annotations // For now we return an empty iterator ExpandedAnnotationsIterator::new(ExpandedAnnotationsSource::Constructed(Box::new( @@ -295,12 +545,16 @@ impl<'top, 'data: 'top, D: LazyDecoder<'data>> LazyExpandedValue<'top, 'data, D> } } - pub fn read(&self) -> IonResult> { + pub fn read(&self) -> IonResult> { use ExpandedValueSource::*; match &self.source { ValueLiteral(value) => Ok(ExpandedValueRef::from_raw(self.context, value.read()?)), - Template(element) => Ok(ExpandedValueRef::from_template(element, self.context)), - Constructed((_annotations, value)) => Ok((*value).clone()), + Template(environment, element) => Ok(ExpandedValueRef::from_template( + self.context, + *environment, + element, + )), + Constructed(_annotations, value) => Ok((*value).clone()), } } @@ -309,58 +563,48 @@ impl<'top, 'data: 'top, D: LazyDecoder<'data>> LazyExpandedValue<'top, 'data, D> } } -impl<'top, 'data, D: LazyDecoder<'data>> From> - for LazyValue<'top, 'data, D> -{ - fn from(expanded_value: LazyExpandedValue<'top, 'data, D>) -> Self { +impl<'top, D: LazyDecoder> From> for LazyValue<'top, D> { + fn from(expanded_value: LazyExpandedValue<'top, D>) -> Self { LazyValue { expanded_value } } } -impl<'top, 'data, D: LazyDecoder<'data>> From> - for LazyStruct<'top, 'data, D> -{ - fn from(expanded_struct: LazyExpandedStruct<'top, 'data, D>) -> Self { +impl<'top, D: LazyDecoder> From> for LazyStruct<'top, D> { + fn from(expanded_struct: LazyExpandedStruct<'top, D>) -> Self { LazyStruct { expanded_struct } } } -impl<'top, 'data, D: LazyDecoder<'data>> From> - for LazySExp<'top, 'data, D> -{ - fn from(expanded_sexp: LazyExpandedSExp<'top, 'data, D>) -> Self { +impl<'top, D: LazyDecoder> From> for LazySExp<'top, D> { + fn from(expanded_sexp: LazyExpandedSExp<'top, D>) -> Self { LazySExp { expanded_sexp } } } -impl<'top, 'data, D: LazyDecoder<'data>> From> - for LazyList<'top, 'data, D> -{ - fn from(expanded_list: LazyExpandedList<'top, 'data, D>) -> Self { +impl<'top, D: LazyDecoder> From> for LazyList<'top, D> { + fn from(expanded_list: LazyExpandedList<'top, D>) -> Self { LazyList { expanded_list } } } -pub enum ExpandedAnnotationsSource<'top, 'data, D: LazyDecoder<'data>> { - ValueLiteral(D::AnnotationsIterator), +pub enum ExpandedAnnotationsSource<'top, D: LazyDecoder> { + ValueLiteral(D::AnnotationsIterator<'top>), Template(SymbolsIterator<'top>), // TODO: This is a placeholder impl and always returns an empty iterator Constructed(Box>> + 'top>), } -pub struct ExpandedAnnotationsIterator<'top, 'data, D: LazyDecoder<'data>> { - source: ExpandedAnnotationsSource<'top, 'data, D>, +pub struct ExpandedAnnotationsIterator<'top, D: LazyDecoder> { + source: ExpandedAnnotationsSource<'top, D>, } -impl<'top, 'data, D: LazyDecoder<'data>> ExpandedAnnotationsIterator<'top, 'data, D> { - pub fn new(source: ExpandedAnnotationsSource<'top, 'data, D>) -> Self { +impl<'top, D: LazyDecoder> ExpandedAnnotationsIterator<'top, D> { + pub fn new(source: ExpandedAnnotationsSource<'top, D>) -> Self { Self { source } } } -impl<'top, 'data: 'top, D: LazyDecoder<'data>> Iterator - for ExpandedAnnotationsIterator<'top, 'data, D> -{ +impl<'top, D: LazyDecoder> Iterator for ExpandedAnnotationsIterator<'top, D> { type Item = IonResult>; fn next(&mut self) -> Option { @@ -381,7 +625,7 @@ impl<'top, 'data: 'top, D: LazyDecoder<'data>> Iterator // hold a 'top reference to a bump allocation instead of a static reference to a heap allocation. // This will enable us to remove several calls to `clone()`, which can be much slower than copies. #[derive(Clone)] -pub enum ExpandedValueRef<'top, 'data, D: LazyDecoder<'data>> { +pub enum ExpandedValueRef<'top, D: LazyDecoder> { Null(IonType), Bool(bool), Int(Int), @@ -392,12 +636,12 @@ pub enum ExpandedValueRef<'top, 'data, D: LazyDecoder<'data>> { Symbol(RawSymbolTokenRef<'top>), Blob(BytesRef<'top>), Clob(BytesRef<'top>), - SExp(LazyExpandedSExp<'top, 'data, D>), - List(LazyExpandedList<'top, 'data, D>), - Struct(LazyExpandedStruct<'top, 'data, D>), + SExp(LazyExpandedSExp<'top, D>), + List(LazyExpandedList<'top, D>), + Struct(LazyExpandedStruct<'top, D>), } -impl<'top, 'data: 'top, D: LazyDecoder<'data>> PartialEq for ExpandedValueRef<'top, 'data, D> { +impl<'top, D: LazyDecoder> PartialEq for ExpandedValueRef<'top, D> { fn eq(&self, other: &Self) -> bool { use ExpandedValueRef::*; match (self, other) { @@ -421,7 +665,7 @@ impl<'top, 'data: 'top, D: LazyDecoder<'data>> PartialEq for ExpandedValueRef<'t } } -impl<'top, 'data: 'top, D: LazyDecoder<'data>> ExpandedValueRef<'top, 'data, D> { +impl<'top, D: LazyDecoder> ExpandedValueRef<'top, D> { fn expected(self, expected_name: &str) -> IonResult { IonResult::decoding_error(format!( "expected a(n) {} but found a {:?}", @@ -517,7 +761,7 @@ impl<'top, 'data: 'top, D: LazyDecoder<'data>> ExpandedValueRef<'top, 'data, D> } } - pub fn expect_list(self) -> IonResult> { + pub fn expect_list(self) -> IonResult> { if let ExpandedValueRef::List(s) = self { Ok(s) } else { @@ -525,7 +769,7 @@ impl<'top, 'data: 'top, D: LazyDecoder<'data>> ExpandedValueRef<'top, 'data, D> } } - pub fn expect_sexp(self) -> IonResult> { + pub fn expect_sexp(self) -> IonResult> { if let ExpandedValueRef::SExp(s) = self { Ok(s) } else { @@ -533,7 +777,7 @@ impl<'top, 'data: 'top, D: LazyDecoder<'data>> ExpandedValueRef<'top, 'data, D> } } - pub fn expect_struct(self) -> IonResult> { + pub fn expect_struct(self) -> IonResult> { if let ExpandedValueRef::Struct(s) = self { Ok(s) } else { @@ -541,7 +785,7 @@ impl<'top, 'data: 'top, D: LazyDecoder<'data>> ExpandedValueRef<'top, 'data, D> } } - fn from_raw(context: EncodingContext<'top>, value: RawValueRef<'data, D>) -> Self { + fn from_raw(context: EncodingContext<'top>, value: RawValueRef<'top, D>) -> Self { use RawValueRef::*; match value { Null(ion_type) => ExpandedValueRef::Null(ion_type), @@ -561,7 +805,7 @@ impl<'top, 'data: 'top, D: LazyDecoder<'data>> ExpandedValueRef<'top, 'data, D> } } -impl<'top, 'data, D: LazyDecoder<'data>> Debug for ExpandedValueRef<'top, 'data, D> { +impl<'top, D: LazyDecoder> Debug for ExpandedValueRef<'top, D> { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { use ExpandedValueRef::*; match self { @@ -583,9 +827,13 @@ impl<'top, 'data, D: LazyDecoder<'data>> Debug for ExpandedValueRef<'top, 'data, } } -impl<'top, 'data: 'top, D: LazyDecoder<'data>> ExpandedValueRef<'top, 'data, D> { - fn from_template(element: &'top Element, context: EncodingContext<'top>) -> Self { - use Value::*; +impl<'top, D: LazyDecoder> ExpandedValueRef<'top, D> { + fn from_template( + context: EncodingContext<'top>, + environment: Environment<'top, D>, + element: &TemplateElement<'top>, + ) -> Self { + use TemplateValue::*; match element.value() { Null(ion_type) => ExpandedValueRef::Null(*ion_type), Bool(b) => ExpandedValueRef::Bool(*b), @@ -599,18 +847,24 @@ impl<'top, 'data: 'top, D: LazyDecoder<'data>> ExpandedValueRef<'top, 'data, D> Clob(c) => ExpandedValueRef::Clob(BytesRef::from(c.as_ref())), List(s) => ExpandedValueRef::List(LazyExpandedList::from_template( context, - element.annotations(), - s, + environment, + element.template(), + element.annotations_range(), + *s, )), SExp(s) => ExpandedValueRef::SExp(LazyExpandedSExp::from_template( context, - element.annotations(), - s, + environment, + element.template(), + element.annotations_range(), + *s, )), Struct(s) => ExpandedValueRef::Struct(LazyExpandedStruct::from_template( context, - element.annotations(), - s, + environment, + element.template(), + element.annotations_range(), + *s, )), } } diff --git a/src/lazy/expanded/sequence.rs b/src/lazy/expanded/sequence.rs index 2d350ee3..79e8fcd0 100644 --- a/src/lazy/expanded/sequence.rs +++ b/src/lazy/expanded/sequence.rs @@ -1,40 +1,110 @@ +use bumpalo::collections::Vec as BumpVec; + +use crate::element::iterators::SymbolsIterator; use crate::lazy::decoder::{LazyDecoder, LazyRawSequence, LazyRawValueExpr, RawValueExpr}; -use crate::lazy::expanded::macro_evaluator::{TransientEExpEvaluator, TransientTdlMacroEvaluator}; -use crate::lazy::expanded::template::TemplateSequenceIterator; +use crate::lazy::expanded::macro_evaluator::{MacroEvaluator, RawEExpression, ValueExpr}; +use crate::lazy::expanded::template::{ + AnnotationsRange, ExprRange, TemplateMacroRef, TemplateSequenceIterator, +}; use crate::lazy::expanded::{ EncodingContext, ExpandedAnnotationsIterator, ExpandedAnnotationsSource, ExpandedValueSource, LazyExpandedValue, }; -use crate::{Annotations, IonResult, IonType, Sequence}; +use crate::result::IonFailure; +use crate::{IonError, IonResult, IonType}; + +/// A sequence of not-yet-evaluated expressions passed as arguments to a macro invocation. +/// +/// The number of expressions is required to match the number of parameters in the macro's signature, +/// and the order of the expressions corresponds to the order of the parameters. +/// +/// For example, given this macro definition: +/// ```ion_1_1 +/// (macro foo (x y z) [x, y, z]) +/// ``` +/// and this invocation: +/// ```ion_1_1 +/// (:foo 1 2 (:values 3)) +/// ``` +/// The `Environment` would contain the expressions `1`, `2` and `3`, corresponding to parameters +/// `x`, `y`, and `z` respectively. +#[derive(Copy, Clone, Debug)] +pub struct Environment<'top, D: LazyDecoder> { + expressions: &'top [ValueExpr<'top, D>], +} + +impl<'top, D: LazyDecoder> Environment<'top, D> { + pub(crate) fn new(args: BumpVec<'top, ValueExpr<'top, D>>) -> Self { + Environment { + expressions: args.into_bump_slice(), + } + } + + /// Returns the expression for the corresponding signature index -- the variable's offset within + /// the template's signature. If the requested index is out of bounds, returns `Err`. + pub fn get_expected(&self, signature_index: usize) -> IonResult<&'top ValueExpr<'top, D>> { + self.expressions() + .get(signature_index) + // The TemplateCompiler should detect any invalid variable references prior to evaluation + .ok_or_else(|| { + IonError::decoding_error(format!( + "reference to variable with signature index {} not valid", + signature_index + )) + }) + } + + /// Returns an empty environment without performing any allocations. This is used for evaluating + /// e-expressions, which never have named parameters. + pub fn empty() -> Environment<'top, D> { + Environment { expressions: &[] } + } + pub fn expressions(&self) -> &'top [ValueExpr<'top, D>] { + self.expressions + } +} -#[derive(Clone)] -pub enum ExpandedListSource<'top, 'data, D: LazyDecoder<'data>> { - ValueLiteral(D::List), - Template(&'top Annotations, &'top Sequence), +/// The data source for a [`LazyExpandedList`]. +#[derive(Clone, Copy)] +pub enum ExpandedListSource<'top, D: LazyDecoder> { + /// The list was a value literal in the input stream. + ValueLiteral(D::List<'top>), + /// The list was part of a template definition. + Template( + Environment<'top, D>, + TemplateMacroRef<'top>, + AnnotationsRange, + ExprRange, + ), // TODO: Constructed } -#[derive(Clone)] -pub struct LazyExpandedList<'top, 'data, D: LazyDecoder<'data>> { +/// A list that may have come from either a value literal in the input stream or from evaluating +/// a template. +#[derive(Clone, Copy)] +pub struct LazyExpandedList<'top, D: LazyDecoder> { pub(crate) context: EncodingContext<'top>, - pub(crate) source: ExpandedListSource<'top, 'data, D>, + pub(crate) source: ExpandedListSource<'top, D>, } -impl<'top, 'data, D: LazyDecoder<'data>> LazyExpandedList<'top, 'data, D> { +impl<'top, D: LazyDecoder> LazyExpandedList<'top, D> { pub fn from_literal( context: EncodingContext<'top>, - list: D::List, - ) -> LazyExpandedList<'top, 'data, D> { + list: D::List<'top>, + ) -> LazyExpandedList<'top, D> { let source = ExpandedListSource::ValueLiteral(list); Self { source, context } } pub fn from_template( context: EncodingContext<'top>, - annotations: &'top Annotations, - sequence: &'top Sequence, - ) -> LazyExpandedList<'top, 'data, D> { - let source = ExpandedListSource::Template(annotations, sequence); + environment: Environment<'top, D>, + template: TemplateMacroRef<'top>, + annotations_range: AnnotationsRange, + step_range: ExprRange, + ) -> LazyExpandedList<'top, D> { + let source = + ExpandedListSource::Template(environment, template, annotations_range, step_range); Self { source, context } } @@ -42,28 +112,38 @@ impl<'top, 'data, D: LazyDecoder<'data>> LazyExpandedList<'top, 'data, D> { IonType::List } - pub fn annotations(&self) -> ExpandedAnnotationsIterator<'top, 'data, D> { - match self.source { + pub fn annotations(&self) -> ExpandedAnnotationsIterator<'top, D> { + match &self.source { ExpandedListSource::ValueLiteral(value) => ExpandedAnnotationsIterator { source: ExpandedAnnotationsSource::ValueLiteral(value.annotations()), }, - ExpandedListSource::Template(annotations, _sequence) => ExpandedAnnotationsIterator { - source: ExpandedAnnotationsSource::Template(annotations.iter()), - }, + ExpandedListSource::Template(_environment, template, annotations, _sequence) => { + let annotations = template + .body + .annotations_storage() + .get(annotations.ops_range()) + .unwrap(); + ExpandedAnnotationsIterator { + source: ExpandedAnnotationsSource::Template(SymbolsIterator::new(annotations)), + } + } } } - pub fn iter(&self) -> ExpandedListIterator<'top, 'data, D> { + pub fn iter(&self) -> ExpandedListIterator<'top, D> { let source = match &self.source { ExpandedListSource::ValueLiteral(list) => { - let evaluator = TransientEExpEvaluator::new_transient(self.context); + let evaluator = MacroEvaluator::new(self.context, Environment::empty()); ExpandedListIteratorSource::ValueLiteral(evaluator, list.iter()) } - ExpandedListSource::Template(_annotations, sequence) => { + ExpandedListSource::Template(environment, template, _annotations, steps) => { + let steps = template.body.expressions().get(steps.ops_range()).unwrap(); + let evaluator = MacroEvaluator::new(self.context, *environment); ExpandedListIteratorSource::Template(TemplateSequenceIterator::new( self.context, - TransientTdlMacroEvaluator::new_transient(self.context), - sequence, + evaluator, + *template, + steps, )) } }; @@ -74,24 +154,26 @@ impl<'top, 'data, D: LazyDecoder<'data>> LazyExpandedList<'top, 'data, D> { } } -pub enum ExpandedListIteratorSource<'top, 'data, D: LazyDecoder<'data>> { +/// The source of child values iterated over by an [`ExpandedListIterator`]. +pub enum ExpandedListIteratorSource<'top, D: LazyDecoder> { ValueLiteral( // Giving the list iterator its own evaluator means that we can abandon the iterator // at any time without impacting the evaluation state of its parent container. - TransientEExpEvaluator<'top, 'data, D>, - >::Iterator, + MacroEvaluator<'top, D>, + as LazyRawSequence<'top, D>>::Iterator, ), - Template(TemplateSequenceIterator<'top, 'data, D>), + Template(TemplateSequenceIterator<'top, D>), // TODO: Constructed } -pub struct ExpandedListIterator<'top, 'data, D: LazyDecoder<'data>> { +/// Iterates over the child values of a [`LazyExpandedList`]. +pub struct ExpandedListIterator<'top, D: LazyDecoder> { context: EncodingContext<'top>, - source: ExpandedListIteratorSource<'top, 'data, D>, + source: ExpandedListIteratorSource<'top, D>, } -impl<'top, 'data, D: LazyDecoder<'data>> Iterator for ExpandedListIterator<'top, 'data, D> { - type Item = IonResult>; +impl<'top, D: LazyDecoder> Iterator for ExpandedListIterator<'top, D> { + type Item = IonResult>; fn next(&mut self) -> Option { match &mut self.source { @@ -103,45 +185,65 @@ impl<'top, 'data, D: LazyDecoder<'data>> Iterator for ExpandedListIterator<'top, } } -#[derive(Clone)] -pub enum ExpandedSExpSource<'top, 'data, D: LazyDecoder<'data>> { - ValueLiteral(D::SExp), - Template(&'top Annotations, &'top Sequence), +/// The data source for a [`LazyExpandedSExp`]. +#[derive(Clone, Copy)] +pub enum ExpandedSExpSource<'top, D: LazyDecoder> { + /// The SExp was a value literal in the input stream. + ValueLiteral(D::SExp<'top>), + /// The SExp was part of a template definition. + Template( + Environment<'top, D>, + TemplateMacroRef<'top>, + AnnotationsRange, + ExprRange, + ), } -#[derive(Clone)] -pub struct LazyExpandedSExp<'top, 'data, D: LazyDecoder<'data>> { - pub(crate) source: ExpandedSExpSource<'top, 'data, D>, +/// An s-expression that may have come from either a value literal in the input stream or from +/// evaluating a template. +#[derive(Clone, Copy)] +pub struct LazyExpandedSExp<'top, D: LazyDecoder> { + pub(crate) source: ExpandedSExpSource<'top, D>, pub(crate) context: EncodingContext<'top>, } -impl<'top, 'data, D: LazyDecoder<'data>> LazyExpandedSExp<'top, 'data, D> { +impl<'top, D: LazyDecoder> LazyExpandedSExp<'top, D> { pub fn ion_type(&self) -> IonType { IonType::SExp } - pub fn annotations(&self) -> ExpandedAnnotationsIterator<'top, 'data, D> { - match self.source { + pub fn annotations(&self) -> ExpandedAnnotationsIterator<'top, D> { + match &self.source { ExpandedSExpSource::ValueLiteral(value) => ExpandedAnnotationsIterator { source: ExpandedAnnotationsSource::ValueLiteral(value.annotations()), }, - ExpandedSExpSource::Template(annotations, _sequence) => ExpandedAnnotationsIterator { - source: ExpandedAnnotationsSource::Template(annotations.iter()), - }, + ExpandedSExpSource::Template(_environment, template, annotations, _sequence) => { + let annotations = template + .body + .annotations_storage() + .get(annotations.ops_range()) + .unwrap(); + ExpandedAnnotationsIterator { + source: ExpandedAnnotationsSource::Template(SymbolsIterator::new(annotations)), + } + } } } - pub fn iter(&self) -> ExpandedSExpIterator<'top, 'data, D> { + pub fn iter(&self) -> ExpandedSExpIterator<'top, D> { let source = match &self.source { ExpandedSExpSource::ValueLiteral(sexp) => { - let evaluator = TransientEExpEvaluator::new_transient(self.context); + let evaluator = MacroEvaluator::new(self.context, Environment::empty()); ExpandedSExpIteratorSource::ValueLiteral(evaluator, sexp.iter()) } - ExpandedSExpSource::Template(_annotations, sequence) => { + ExpandedSExpSource::Template(environment, template, _annotations, steps) => { + let steps = template.body.expressions().get(steps.ops_range()).unwrap(); + let evaluator = MacroEvaluator::new(self.context, *environment); ExpandedSExpIteratorSource::Template(TemplateSequenceIterator::new( self.context, - TransientTdlMacroEvaluator::new_transient(self.context), - sequence, + evaluator, + *template, + steps, )) } }; @@ -153,40 +255,44 @@ impl<'top, 'data, D: LazyDecoder<'data>> LazyExpandedSExp<'top, 'data, D> { pub fn from_literal( context: EncodingContext<'top>, - sexp: D::SExp, - ) -> LazyExpandedSExp<'top, 'data, D> { + sexp: D::SExp<'top>, + ) -> LazyExpandedSExp<'top, D> { let source = ExpandedSExpSource::ValueLiteral(sexp); Self { source, context } } pub fn from_template( context: EncodingContext<'top>, - annotations: &'top Annotations, - sequence: &'top Sequence, - ) -> LazyExpandedSExp<'top, 'data, D> { - let source = ExpandedSExpSource::Template(annotations, sequence); + environment: Environment<'top, D>, + template: TemplateMacroRef<'top>, + annotations: AnnotationsRange, + expressions: ExprRange, + ) -> LazyExpandedSExp<'top, D> { + let source = ExpandedSExpSource::Template(environment, template, annotations, expressions); Self { source, context } } } -pub enum ExpandedSExpIteratorSource<'top, 'data, D: LazyDecoder<'data>> { +/// The source of child values iterated over by an [`ExpandedSExpIterator`]. +pub enum ExpandedSExpIteratorSource<'top, D: LazyDecoder> { ValueLiteral( // Giving the sexp iterator its own evaluator means that we can abandon the iterator // at any time without impacting the evaluation state of its parent container. - TransientEExpEvaluator<'top, 'data, D>, - >::Iterator, + MacroEvaluator<'top, D>, + as LazyRawSequence<'top, D>>::Iterator, ), - Template(TemplateSequenceIterator<'top, 'data, D>), + Template(TemplateSequenceIterator<'top, D>), // TODO: Constructed } -pub struct ExpandedSExpIterator<'top, 'data, D: LazyDecoder<'data>> { +/// Iterates over the child values of a [`LazyExpandedSExp`]. +pub struct ExpandedSExpIterator<'top, D: LazyDecoder> { context: EncodingContext<'top>, - source: ExpandedSExpIteratorSource<'top, 'data, D>, + source: ExpandedSExpIteratorSource<'top, D>, } -impl<'top, 'data, D: LazyDecoder<'data>> Iterator for ExpandedSExpIterator<'top, 'data, D> { - type Item = IonResult>; +impl<'top, D: LazyDecoder> Iterator for ExpandedSExpIterator<'top, D> { + type Item = IonResult>; fn next(&mut self) -> Option { match &mut self.source { @@ -200,15 +306,15 @@ impl<'top, 'data, D: LazyDecoder<'data>> Iterator for ExpandedSExpIterator<'top, /// For both lists and s-expressions, yields the next sequence value by either continuing a macro /// evaluation already in progress or reading the next item from the input stream. -fn expand_next_sequence_value<'top, 'data, D: LazyDecoder<'data>>( +fn expand_next_sequence_value<'top, D: LazyDecoder>( context: EncodingContext<'top>, - evaluator: &mut TransientEExpEvaluator<'top, 'data, D>, - iter: &mut impl Iterator>>, -) -> Option>> { + evaluator: &mut MacroEvaluator<'top, D>, + iter: &mut impl Iterator>>, +) -> Option>> { loop { // If the evaluator's stack is not empty, it's still expanding a macro. - if evaluator.stack_depth() > 0 { - let value = evaluator.next(context, 0).transpose(); + if evaluator.macro_stack_depth() > 0 { + let value = evaluator.next(context).transpose(); if value.is_some() { // The `Some` may contain a value or an error; either way, that's the next return value. return value; @@ -226,7 +332,11 @@ fn expand_next_sequence_value<'top, 'data, D: LazyDecoder<'data>>( })) } Some(Ok(RawValueExpr::MacroInvocation(invocation))) => { - let begin_expansion_result = evaluator.push(context, invocation); + let resolved_invocation = match invocation.resolve(context) { + Ok(resolved) => resolved, + Err(e) => return Some(Err(e)), + }; + let begin_expansion_result = evaluator.push(context, resolved_invocation); if let Err(e) = begin_expansion_result { return Some(Err(e)); } diff --git a/src/lazy/expanded/stack.rs b/src/lazy/expanded/stack.rs deleted file mode 100644 index f731e7bb..00000000 --- a/src/lazy/expanded/stack.rs +++ /dev/null @@ -1,72 +0,0 @@ -use bumpalo::collections::Vec as BumpVec; -use std::fmt::Debug; - -/// Backing storage for the [`MacroEvaluator`](crate::lazy::expanded::macro_evaluator::MacroEvaluator). -/// -/// This is implemented both by `Vec` (which has a static lifetime) and [`BumpVec`](bumpalo::collections::Vec), -/// which uses storage tied to the encoding context's lifetime. -pub trait Stack: Debug { - fn push(&mut self, value: T); - fn pop(&mut self) -> Option; - - fn peek(&self) -> Option<&T>; - fn peek_mut(&mut self) -> Option<&mut T>; - - fn clear(&mut self); - fn len(&self) -> usize; - fn is_empty(&self) -> bool { - self.len() == 0 - } -} - -impl Stack for Vec { - fn push(&mut self, value: T) { - self.push(value) - } - - fn pop(&mut self) -> Option { - self.pop() - } - - fn peek(&self) -> Option<&T> { - self.last() - } - - fn peek_mut(&mut self) -> Option<&mut T> { - self.last_mut() - } - - fn clear(&mut self) { - self.clear() - } - - fn len(&self) -> usize { - self.len() - } -} - -impl<'a, T: Debug> Stack for BumpVec<'a, T> { - fn push(&mut self, value: T) { - self.push(value) - } - - fn pop(&mut self) -> Option { - self.pop() - } - - fn peek(&self) -> Option<&T> { - self.last() - } - - fn peek_mut(&mut self) -> Option<&mut T> { - self.last_mut() - } - - fn clear(&mut self) { - self.clear() - } - - fn len(&self) -> usize { - self.len() - } -} diff --git a/src/lazy/expanded/struct.rs b/src/lazy/expanded/struct.rs index a544e818..9c62eb02 100644 --- a/src/lazy/expanded/struct.rs +++ b/src/lazy/expanded/struct.rs @@ -1,28 +1,30 @@ use std::ops::ControlFlow; -use crate::lazy::decoder::{LazyDecoder, LazyRawStruct, RawFieldExpr, RawValueExpr}; -use crate::lazy::expanded::macro_evaluator::{ - MacroEvaluator, MacroExpansion, MacroInvocation, TransientEExpEvaluator, - TransientTdlMacroEvaluator, +use crate::element::iterators::SymbolsIterator; +use crate::lazy::decoder::{ + LazyDecoder, LazyRawFieldExpr, LazyRawStruct, RawFieldExpr, RawValueExpr, +}; +use crate::lazy::expanded::macro_evaluator::{MacroEvaluator, MacroExpr, RawEExpression}; +use crate::lazy::expanded::sequence::Environment; +use crate::lazy::expanded::template::{ + AnnotationsRange, ExprRange, TemplateMacroRef, TemplateStructRawFieldsIterator, }; -use crate::lazy::expanded::stack::Stack; -use crate::lazy::expanded::template::TemplateStructRawFieldsIterator; use crate::lazy::expanded::{ EncodingContext, ExpandedAnnotationsIterator, ExpandedAnnotationsSource, ExpandedValueRef, ExpandedValueSource, LazyExpandedValue, }; use crate::raw_symbol_token_ref::AsRawSymbolTokenRef; use crate::result::IonFailure; -use crate::{Annotations, IonError, IonResult, RawSymbolTokenRef, Struct}; +use crate::{IonError, IonResult, RawSymbolTokenRef}; #[derive(Debug, Clone)] -pub struct LazyExpandedField<'top, 'data, D: LazyDecoder<'data>> { +pub struct LazyExpandedField<'top, D: LazyDecoder> { name: RawSymbolTokenRef<'top>, - pub(crate) value: LazyExpandedValue<'top, 'data, D>, + pub(crate) value: LazyExpandedValue<'top, D>, } -impl<'top, 'data, D: LazyDecoder<'data>> LazyExpandedField<'top, 'data, D> { - pub fn new(name: RawSymbolTokenRef<'top>, value: LazyExpandedValue<'top, 'data, D>) -> Self { +impl<'top, D: LazyDecoder> LazyExpandedField<'top, D> { + pub fn new(name: RawSymbolTokenRef<'top>, value: LazyExpandedValue<'top, D>) -> Self { Self { name, value } } @@ -30,69 +32,86 @@ impl<'top, 'data, D: LazyDecoder<'data>> LazyExpandedField<'top, 'data, D> { self.name.clone() } - pub fn value(&self) -> &LazyExpandedValue<'top, 'data, D> { + pub fn value(&self) -> &LazyExpandedValue<'top, D> { &self.value } } #[derive(Clone)] -pub enum ExpandedStructSource<'top, 'data, D: LazyDecoder<'data>> { - ValueLiteral(D::Struct), - Template(&'top Annotations, &'top Struct), +pub enum ExpandedStructSource<'top, D: LazyDecoder> { + ValueLiteral(D::Struct<'top>), + Template( + Environment<'top, D>, + TemplateMacroRef<'top>, + AnnotationsRange, + ExprRange, + ), // TODO: Constructed } #[derive(Clone)] -pub struct LazyExpandedStruct<'top, 'data, D: LazyDecoder<'data>> { +pub struct LazyExpandedStruct<'top, D: LazyDecoder> { pub(crate) context: EncodingContext<'top>, - pub(crate) source: ExpandedStructSource<'top, 'data, D>, + pub(crate) source: ExpandedStructSource<'top, D>, } -impl<'top, 'data: 'top, D: LazyDecoder<'data>> LazyExpandedStruct<'top, 'data, D> { +impl<'top, D: LazyDecoder> LazyExpandedStruct<'top, D> { pub fn from_literal( context: EncodingContext<'top>, - sexp: D::Struct, - ) -> LazyExpandedStruct<'top, 'data, D> { + sexp: D::Struct<'top>, + ) -> LazyExpandedStruct<'top, D> { let source = ExpandedStructSource::ValueLiteral(sexp); Self { source, context } } pub fn from_template( context: EncodingContext<'top>, - annotations: &'top Annotations, - struct_: &'top Struct, - ) -> LazyExpandedStruct<'top, 'data, D> { - let source = ExpandedStructSource::Template(annotations, struct_); + environment: Environment<'top, D>, + template: TemplateMacroRef<'top>, + annotations: AnnotationsRange, + expressions: ExprRange, + ) -> LazyExpandedStruct<'top, D> { + let source = + ExpandedStructSource::Template(environment, template, annotations, expressions); Self { source, context } } - pub fn annotations(&self) -> ExpandedAnnotationsIterator<'top, 'data, D> { - match self.source { + pub fn annotations(&self) -> ExpandedAnnotationsIterator<'top, D> { + match &self.source { ExpandedStructSource::ValueLiteral(value) => ExpandedAnnotationsIterator { source: ExpandedAnnotationsSource::ValueLiteral(value.annotations()), }, - ExpandedStructSource::Template(annotations, _struct) => ExpandedAnnotationsIterator { - source: ExpandedAnnotationsSource::Template(annotations.iter()), - }, + ExpandedStructSource::Template(_environment, template, annotations, _expressions) => { + let annotations = template + .body + .annotations_storage() + .get(annotations.ops_range()) + .unwrap(); + ExpandedAnnotationsIterator { + source: ExpandedAnnotationsSource::Template(SymbolsIterator::new(annotations)), + } + } } } - pub fn iter(&self) -> ExpandedStructIterator<'top, 'data, D> { - let source = match self.source { + pub fn iter(&self) -> ExpandedStructIterator<'top, D> { + let source = match &self.source { ExpandedStructSource::ValueLiteral(raw_struct) => { ExpandedStructIteratorSource::ValueLiteral( - MacroEvaluator::< - D, - >::MacroInvocation, - bumpalo::collections::Vec<'top, _>, - >::new_transient(self.context), + MacroEvaluator::new(self.context, Environment::empty()), raw_struct.iter(), ) } - ExpandedStructSource::Template(_annotations, struct_) => { + ExpandedStructSource::Template(environment, template, _annotations, expressions) => { + let evaluator = MacroEvaluator::new(self.context, *environment); ExpandedStructIteratorSource::Template( - TransientTdlMacroEvaluator::new_transient(self.context), - TemplateStructRawFieldsIterator::new(struct_), + evaluator, + TemplateStructRawFieldsIterator::new( + self.context, + *environment, + *template, + &template.body.expressions[expressions.ops_range()], + ), ) } }; @@ -103,25 +122,32 @@ impl<'top, 'data: 'top, D: LazyDecoder<'data>> LazyExpandedStruct<'top, 'data, D } } - pub fn bump_iter(&self) -> &'top mut ExpandedStructIterator<'top, 'data, D> { + fn environment(&self) -> Environment<'top, D> { + match &self.source { + ExpandedStructSource::ValueLiteral(_) => Environment::empty(), + ExpandedStructSource::Template(environment, _, _, _) => *environment, + } + } + + pub fn bump_iter(&self) -> &'top mut ExpandedStructIterator<'top, D> { self.context.allocator.alloc_with(|| self.iter()) } - pub fn find(&self, name: &str) -> IonResult>> { + pub fn find(&self, name: &str) -> IonResult>> { for field_result in self.iter() { let field = field_result?; if field.name() == name.as_raw_symbol_token_ref() { - return Ok(Some(field.value().clone())); + return Ok(Some(*field.value())); } } Ok(None) } - pub fn get(&self, name: &str) -> IonResult>> { + pub fn get(&self, name: &str) -> IonResult>> { self.find(name)?.map(|f| f.read()).transpose() } - pub fn get_expected(&self, name: &str) -> IonResult> { + pub fn get_expected(&self, name: &str) -> IonResult> { if let Some(value) = self.get(name)? { Ok(value) } else { @@ -130,34 +156,34 @@ impl<'top, 'data: 'top, D: LazyDecoder<'data>> LazyExpandedStruct<'top, 'data, D } } -pub enum ExpandedStructIteratorSource<'top, 'data, D: LazyDecoder<'data>> { +pub enum ExpandedStructIteratorSource<'top, D: LazyDecoder> { // The struct we're iterating over is a literal in the data stream. It may contain // e-expressions that need to be evaluated. ValueLiteral( // Giving the struct iterator its own evaluator means that we can abandon the iterator // at any time without impacting the evaluation state of its parent container. - TransientEExpEvaluator<'top, 'data, D>, - >::Iterator, + MacroEvaluator<'top, D>, + as LazyRawStruct<'top, D>>::Iterator, ), // The struct we're iterating over is a value in a TDL template. It may contain macro // invocations that need to be evaluated. Template( - TransientTdlMacroEvaluator<'top, 'data, D>, - TemplateStructRawFieldsIterator<'top>, + MacroEvaluator<'top, D>, + TemplateStructRawFieldsIterator<'top, D>, ), // TODO: Constructed } -pub struct ExpandedStructIterator<'top, 'data, D: LazyDecoder<'data>> { +pub struct ExpandedStructIterator<'top, D: LazyDecoder> { context: EncodingContext<'top>, - source: ExpandedStructIteratorSource<'top, 'data, D>, + source: ExpandedStructIteratorSource<'top, D>, // Stores information about any operations that are still in progress. - state: ExpandedStructIteratorState<'top, 'data, D>, + state: ExpandedStructIteratorState<'top, D>, } /// Ion 1.1's struct is very versatile, and supports a variety of expansion operations. This /// types indicates which operation is in the process of being carried out. -enum ExpandedStructIteratorState<'top, 'data: 'top, D: LazyDecoder<'data>> { +enum ExpandedStructIteratorState<'top, D: LazyDecoder> { // The iterator is not performing any operations. It is ready to pull the next field from its // source. ReadingFieldFromSource, @@ -179,13 +205,13 @@ enum ExpandedStructIteratorState<'top, 'data: 'top, D: LazyDecoder<'data>> { // This variant holds a pointer to that struct's iterator living in the // EncodingContext's bump allocator. InliningAStruct( - LazyExpandedStruct<'top, 'data, D>, - &'top mut ExpandedStructIterator<'top, 'data, D>, + LazyExpandedStruct<'top, D>, + &'top mut ExpandedStructIterator<'top, D>, ), } -impl<'top, 'data: 'top, D: LazyDecoder<'data>> Iterator for ExpandedStructIterator<'top, 'data, D> { - type Item = IonResult>; +impl<'top, D: LazyDecoder> Iterator for ExpandedStructIterator<'top, D> { + type Item = IonResult>; fn next(&mut self) -> Option { let Self { @@ -197,8 +223,26 @@ impl<'top, 'data: 'top, D: LazyDecoder<'data>> Iterator for ExpandedStructIterat ExpandedStructIteratorSource::Template(tdl_macro_evaluator, template_iterator) => { Self::next_field_from(context, state, tdl_macro_evaluator, template_iterator) } - ExpandedStructIteratorSource::ValueLiteral(e_exp_evaluator, iter) => { - Self::next_field_from(context, state, e_exp_evaluator, iter) + ExpandedStructIteratorSource::ValueLiteral(e_exp_evaluator, raw_struct_iter) => { + let mut iter_adapter = raw_struct_iter.map( + |field: IonResult>| match field? { + RawFieldExpr::NameValuePair(name, RawValueExpr::MacroInvocation(m)) => { + let resolved_invocation = m.resolve(context)?; + Ok(RawFieldExpr::NameValuePair( + name, + RawValueExpr::MacroInvocation(resolved_invocation.into()), + )) + } + RawFieldExpr::NameValuePair(name, RawValueExpr::ValueLiteral(value)) => Ok( + RawFieldExpr::NameValuePair(name, RawValueExpr::ValueLiteral(value)), + ), + RawFieldExpr::MacroInvocation(invocation) => { + let resolved_invocation = invocation.resolve(context)?; + Ok(RawFieldExpr::MacroInvocation(resolved_invocation.into())) + } + }, + ); + Self::next_field_from(context, state, e_exp_evaluator, &mut iter_adapter) } } } @@ -211,7 +255,7 @@ impl<'top, 'data: 'top, D: LazyDecoder<'data>> Iterator for ExpandedStructIterat // 'top: The lifetime associated with the top-level value we're currently reading at some depth. // 'data: The lifetime associated with the byte array containing the Ion we're reading from. // D: The decoder being used to read the Ion data stream. For example: `TextEncoding_1_1` -impl<'top, 'data: 'top, D: LazyDecoder<'data>> ExpandedStructIterator<'top, 'data, D> { +impl<'top, D: LazyDecoder> ExpandedStructIterator<'top, D> { /// Pulls the next expanded field from the raw source struct. The field returned may correspond /// to a `(name, value literal)` pair in the raw struct, or it may be the product of a macro /// evaluation. @@ -221,28 +265,19 @@ impl<'top, 'data: 'top, D: LazyDecoder<'data>> ExpandedStructIterator<'top, 'dat // The lifetime of the field name that we return; it needs to live at least as long as // `top -- the amount of time that the reader will be parked on this top level value. 'name: 'top, - // The syntactic element that represents a macro invocation in this context. For - // example: a `RawTextMacroInvocation` when reading text Ion 1.1 or a `&'top Sequence` when - // evaluating a TDL macro. - M: MacroInvocation<'data, D> + 'top, // We have an iterator (see `I` below) that gives us raw fields from an input struct. // This type, `V`, is the type of value in that raw field. For example: `LazyRawTextValue_1_1` // when reading text Ion 1.1, or `&'top Element` when evaluating a TDL macro. - V: Into>, - // The type of backing storage used by our macro evaluator. If struct we're iterating over is - // at the top level of the data stream, the evaluator will use a `Vec` for its stack to have - // storage that can persist across top level values. If this is a nested struct or part of - // a template, this will be a transient `BumpVec` with a lifetime tied to the top level. - S: Stack>, + V: Into>, // An iterator over the struct we're expanding. It may be the fields iterator from a // LazyRawStruct, or it could be a `TemplateStructRawFieldsIterator`. - I: Iterator>>, + I: Iterator>>>, >( context: EncodingContext<'top>, - state: &'a mut ExpandedStructIteratorState<'top, 'data, D>, - evaluator: &'a mut MacroEvaluator<'data, D, M, S>, + state: &'a mut ExpandedStructIteratorState<'top, D>, + evaluator: &'a mut MacroEvaluator<'top, D>, iter: &'a mut I, - ) -> Option>> { + ) -> Option>> { // This method begins by pulling raw field expressions from the source iterator. // If the expression is a (name, value literal) pair, we can wrap it in an LazyExpandedField // and return it immediately. However, if it is a (name, macro) pair or (macro) expression, @@ -281,7 +316,7 @@ impl<'top, 'data: 'top, D: LazyDecoder<'data>> ExpandedStructIterator<'top, 'dat // macro in field value position, emitting (name, value) pairs for each value // in the expansion, one at a time. ExpandingValueExpr(field_name) => { - match evaluator.next(context, 0) { + match evaluator.next(context) { Err(e) => return Some(Err(e)), Ok(Some(next_value)) => { // We got another value from the macro we're evaluating. Emit @@ -308,16 +343,14 @@ impl<'top, 'data: 'top, D: LazyDecoder<'data>> ExpandedStructIterator<'top, 'dat // These generics are all carried over from the function above. 'a, 'name: 'top, - M: MacroInvocation<'data, D> + 'top, - V: Into>, - S: Stack>, - I: Iterator>>, + V: Into>, + I: Iterator>>>, >( context: EncodingContext<'top>, - state: &mut ExpandedStructIteratorState<'top, 'data, D>, - evaluator: &mut MacroEvaluator<'data, D, M, S>, + state: &mut ExpandedStructIteratorState<'top, D>, + evaluator: &mut MacroEvaluator<'top, D>, iter: &mut I, - ) -> ControlFlow>>> { + ) -> ControlFlow>>> { // Because this helper function is always being invoked from within a loop, it uses // the `ControlFlow` enum to signal whether its return value should cause the loop to // terminate (`ControlFlow::Break`) or continue (`ControlFlow::Continue`). @@ -330,7 +363,7 @@ impl<'top, 'data: 'top, D: LazyDecoder<'data>> ExpandedStructIterator<'top, 'dat }; return match field_expr_result { - Err(e) => Break(Some(Err::, IonError>(e))), + Err(e) => Break(Some(Err::, IonError>(e))), // Plain (name, value literal) pair. For example: `foo: 1` Ok(RawFieldExpr::NameValuePair(name, RawValueExpr::ValueLiteral(value))) => { Break(Some(Ok(LazyExpandedField::new( @@ -372,16 +405,11 @@ impl<'top, 'data: 'top, D: LazyDecoder<'data>> ExpandedStructIterator<'top, 'dat /// Pulls the next value from the evaluator, confirms that it's a struct, and then switches /// the iterator state to `InliningAStruct` so it can begin merging its fields. - fn begin_inlining_struct_from_macro< - 'a, - 'name: 'top, - M: MacroInvocation<'data, D> + 'top, - S: Stack>, - >( + fn begin_inlining_struct_from_macro<'a, 'name: 'top>( context: EncodingContext<'top>, - state: &mut ExpandedStructIteratorState<'top, 'data, D>, - evaluator: &mut MacroEvaluator<'data, D, M, S>, - invocation: M, + state: &mut ExpandedStructIteratorState<'top, D>, + evaluator: &mut MacroEvaluator<'top, D>, + invocation: MacroExpr<'top, D>, ) -> IonResult<()> { let mut evaluation = evaluator.evaluate(context, invocation)?; let expanded_value = match evaluation.next() { @@ -398,7 +426,7 @@ impl<'top, 'data: 'top, D: LazyDecoder<'data>> ExpandedStructIterator<'top, 'dat )) } }; - let iter: &'top mut ExpandedStructIterator<'top, 'data, D> = struct_.bump_iter(); + let iter: &'top mut ExpandedStructIterator<'top, D> = struct_.bump_iter(); *state = ExpandedStructIteratorState::InliningAStruct(struct_, iter); Ok(()) } diff --git a/src/lazy/expanded/tdl_macro.rs b/src/lazy/expanded/tdl_macro.rs deleted file mode 100644 index e493327e..00000000 --- a/src/lazy/expanded/tdl_macro.rs +++ /dev/null @@ -1,96 +0,0 @@ -//! Types and traits representing a macro invocation within a TDL (Template Definition -//! Language) expression. - -use crate::element::iterators::SequenceIterator; -use crate::lazy::decoder::LazyDecoder; -use crate::lazy::expanded::macro_evaluator::{ArgumentKind, MacroInvocation, ToArgumentKind}; -use crate::lazy::text::raw::v1_1::reader::MacroIdRef; - -use crate::lazy::expanded::{EncodingContext, ExpandedValueSource, LazyExpandedValue}; -use crate::raw_symbol_token_ref::AsRawSymbolTokenRef; -use crate::{Element, IonResult, Sequence, Value}; - -impl<'top, 'data, D: LazyDecoder<'data>> MacroInvocation<'data, D> for &'top Sequence { - type ArgumentExpr = &'top Element; - type ArgumentsIterator = OkAdapter>; - - // TODO: This dummy implementation using `&'top Sequence` will be replaced by a purpose-built - // type that validates the invocation before reaching this method. For now, this method can - // panic if the input is malformed. - fn id(&self) -> MacroIdRef { - match self.get(0).expect("TDL macro call missing ID").value() { - Value::Int(address) => MacroIdRef::LocalAddress( - usize::try_from(address.expect_i64().unwrap()) - .expect("macro address int out of bounds for usize"), - ), - Value::Symbol(name) => { - MacroIdRef::LocalName(name.text().expect("cannot use $0 as macro name")) - } - _ => panic!("macro IDs must be an int or symbol"), - } - } - - fn arguments(&self) -> Self::ArgumentsIterator { - let mut children = self.elements(); - let _id = children.next().unwrap(); - OkAdapter { iterator: children } - } -} - -/// Wraps an infallible iterator's output items in `Result::Ok`. -pub struct OkAdapter -where - I: Iterator, -{ - iterator: I, -} - -impl OkAdapter -where - I: Iterator, -{ - pub fn new(iterator: I) -> Self { - Self { iterator } - } -} - -impl Iterator for OkAdapter { - type Item = IonResult<::Item>; - - fn next(&mut self) -> Option { - self.iterator.next().map(Ok) - } -} - -// When an `&Element` appears in macro argument position within a template, this trait implementation -// recognizes whether the `&Element` represents a value, a variable, or another template invocation. -impl<'element, 'data, D: LazyDecoder<'data>> ToArgumentKind<'data, D, &'element Sequence> - for &'element Element -{ - fn to_arg_expr<'top>( - self, - context: EncodingContext<'top>, - ) -> ArgumentKind<'top, 'data, D, &'element Sequence> - where - 'element: 'top, - { - // In this implementation, we are reading the arguments to a template macro invocation. - // For example: - // - // (macro twice (a) - // // Inside a template definition, calling the `values` macro with two arguments - // (values a a) - // ) - // In this context, there are named variables to consider. If we encounter a symbol like `a` - // in argument position, we must flag it as a variable so the caller has the opportunity to - // resolve it to a value stream. - match self.value() { - Value::SExp(sequence) => ArgumentKind::MacroInvocation(sequence), - Value::Symbol(variable) => ArgumentKind::Variable(variable.as_raw_symbol_token_ref()), - _ => ArgumentKind::ValueLiteral(LazyExpandedValue { - context, - source: ExpandedValueSource::Template(self), - }), - } - } -} diff --git a/src/lazy/expanded/template.rs b/src/lazy/expanded/template.rs index 0e06a40d..43a2966d 100644 --- a/src/lazy/expanded/template.rs +++ b/src/lazy/expanded/template.rs @@ -1,101 +1,986 @@ +use std::fmt; +use std::fmt::{Debug, Formatter}; +use std::ops::{Deref, Range}; + use crate::lazy::decoder::{LazyDecoder, RawFieldExpr, RawValueExpr}; -use crate::lazy::expanded::macro_evaluator::TransientTdlMacroEvaluator; -use crate::lazy::expanded::{EncodingContext, ExpandedValueSource, LazyExpandedValue}; -use crate::raw_symbol_token_ref::AsRawSymbolTokenRef; -use crate::{Element, IonResult, Sequence, Struct, Value}; +use crate::lazy::expanded::macro_evaluator::{MacroEvaluator, MacroExpr, ValueExpr}; +use crate::lazy::expanded::macro_table::MacroRef; +use crate::lazy::expanded::sequence::Environment; +use crate::lazy::expanded::{ + EncodingContext, ExpandedValueRef, ExpandedValueSource, LazyExpandedValue, +}; +use crate::lazy::text::raw::v1_1::reader::{MacroAddress, MacroIdRef}; +use crate::result::IonFailure; +use crate::{Bytes, Decimal, Int, IonResult, IonType, Str, Symbol, Timestamp, Value}; + +/// A parameter in a user-defined macro's signature. +#[derive(Debug, Clone)] +pub struct Parameter { + name: String, + encoding: ParameterEncoding, + // TODO: Grouping +} + +impl Parameter { + pub fn new(name: String, encoding: ParameterEncoding) -> Self { + Self { name, encoding } + } -pub type TdlMacroInvocation<'top> = &'top Element; + pub fn name(&self) -> &str { + self.name.as_str() + } + pub fn encoding(&self) -> &ParameterEncoding { + &self.encoding + } +} -pub struct TemplateSequenceIterator<'top, 'data, D: LazyDecoder<'data>> { +/// The encoding used to serialize and deserialize the associated parameter. +#[derive(Debug, Clone)] +pub enum ParameterEncoding { + /// A 'tagged' type is one whose binary encoding begins with an opcode (sometimes called a 'tag'.) + Tagged, + // TODO: tagless types, including fixed-width types and macros +} + +/// The sequence of parameters for which callers must pass expressions when invoking the macro. +#[derive(Debug, Clone)] +pub struct MacroSignature { + parameters: Vec, +} + +impl MacroSignature { + fn with_parameter(mut self, name: impl Into, encoding: ParameterEncoding) -> Self { + self.parameters.push(Parameter { + name: name.into(), + encoding, + }); + self + } + + pub fn parameters(&self) -> &[Parameter] { + &self.parameters + } + pub fn new(parameters: Vec) -> Self { + Self { parameters } + } +} + +/// A user-defined macro which expands the parameters in the signature into a series of Ion values +/// according to a template. +/// +/// Macros can be made anonymous by passing `null` in the definition's name position. +/// ```ion_1_1 +/// (macro null (x y z) [x, y, z]) +/// ``` +/// This simplifies the use of machine-authored macros, which are always invoked by their address +/// in the macro table rather than by a human-friendly name. +#[derive(Clone)] +pub struct TemplateMacro { + pub(crate) name: Option, + pub(crate) signature: MacroSignature, + pub(crate) body: TemplateBody, +} + +impl Debug for TemplateMacro { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + writeln!(f, "Template {}", self.name())?; + writeln!(f, " signature:")?; + // Writes each parameter in the signature on its own indented line + for param in self.signature().parameters() { + let name = param.name(); + let encoding = param.encoding(); + writeln!(f, " {name} ({encoding:?})")?; + } + writeln!(f, " body:")?; + let indentation = &mut String::from(" "); + let mut index = 0usize; + while let Some(expr) = self.body().expressions().get(index) { + index += TemplateBodyValueExpr::fmt_expr(f, indentation, self, expr)?; + } + + Ok(()) + } +} + +impl TemplateMacro { + pub fn name(&self) -> &str { + self.name.as_deref().unwrap_or("") + } + pub fn signature(&self) -> &MacroSignature { + &self.signature + } + pub fn body(&self) -> &TemplateBody { + &self.body + } +} + +/// A reference to a template macro definition paired with the macro table address at which it was found. +#[derive(Copy, Clone, Debug)] +pub struct TemplateMacroRef<'top> { + // This field is only stored as a source of information for debugging. (For example, when showing + // a macro evaluator stack trace.) + address: MacroAddress, + template: &'top TemplateMacro, +} + +impl<'top> TemplateMacroRef<'top> { + pub fn new(address: MacroAddress, template: &'top TemplateMacro) -> Self { + Self { address, template } + } + pub fn address(&self) -> MacroAddress { + self.address + } +} + +impl<'top> Deref for TemplateMacroRef<'top> { + type Target = &'top TemplateMacro; + + fn deref(&self) -> &Self::Target { + &self.template + } +} + +/// Steps over the child expressions of a list or s-expression found in the body of a template. +pub struct TemplateSequenceIterator<'top, D: LazyDecoder> { context: EncodingContext<'top>, - evaluator: TransientTdlMacroEvaluator<'top, 'data, D>, - // The list element over which we're iterating - sequence: &'top Sequence, + template: TemplateMacroRef<'top>, + evaluator: MacroEvaluator<'top, D>, + value_expressions: &'top [TemplateBodyValueExpr], index: usize, } -impl<'top, 'data, D: LazyDecoder<'data>> TemplateSequenceIterator<'top, 'data, D> { +impl<'top, D: LazyDecoder> TemplateSequenceIterator<'top, D> { pub fn new( context: EncodingContext<'top>, - evaluator: TransientTdlMacroEvaluator<'top, 'data, D>, - sequence: &'top Sequence, + evaluator: MacroEvaluator<'top, D>, + template: TemplateMacroRef<'top>, + value_expressions: &'top [TemplateBodyValueExpr], ) -> Self { Self { - sequence, - index: 0, context, + template, evaluator, + value_expressions, + index: 0, } } } -impl<'top, 'data, D: LazyDecoder<'data>> Iterator for TemplateSequenceIterator<'top, 'data, D> { - type Item = IonResult>; +impl<'top, D: LazyDecoder> Iterator for TemplateSequenceIterator<'top, D> { + type Item = IonResult>; fn next(&mut self) -> Option { loop { // If the evaluator's stack is not empty, give it the opportunity to yield a value. - if self.evaluator.stack_depth() > 0 { - match self.evaluator.next(self.context, 0).transpose() { + if self.evaluator.macro_stack_depth() > 0 { + match self.evaluator.next(self.context).transpose() { Some(value) => return Some(value), None => { // The stack did not produce values and is empty, pull - // the next expression from `self.sequence` + // the next expression from `self.value_expressions` } } } - // We didn't get a value from the evaluator, so pull the next expression from the - // sequence. - let element = self.sequence.get(self.index)?; + // We didn't get a value from the evaluator, so pull the next expansion step. + let step = self.value_expressions.get(self.index)?; self.index += 1; - // If the expression is an s-expression... - if let Value::SExp(sequence) = element.value() { - // ...it's a TDL macro invocation. Push it onto the evaluator's stack and return - // to the top of the loop. - match self.evaluator.push(self.context, sequence) { - Ok(_) => continue, - Err(e) => return Some(Err(e)), + return match step { + TemplateBodyValueExpr::Element(element) => { + let value = LazyExpandedValue { + context: self.context, + source: ExpandedValueSource::Template( + self.evaluator.environment(), + TemplateElement::new(self.template, element), + ), + }; + Some(Ok(value)) } - } - // Otherwise, it's our next value. - return Some(Ok(LazyExpandedValue { - context: self.context, - source: ExpandedValueSource::Template(element), - })); + TemplateBodyValueExpr::MacroInvocation(body_invocation) => { + // ...it's a TDL macro invocation. Push it onto the evaluator's stack and return + // to the top of the loop. + let invoked_macro = self + .context + .macro_table + .macro_at_address(body_invocation.invoked_macro_address) + .unwrap(); + let invocation = TemplateMacroInvocation::new( + self.context, + self.template, + invoked_macro, + self.template + .body + .expressions() + .get(body_invocation.arg_expr_range().ops_range()) + .unwrap(), + ); + self.index += invocation.arg_expressions.len(); + match self.evaluator.push(self.context, invocation) { + Ok(_) => continue, + Err(e) => Some(Err(e)), + } + } + TemplateBodyValueExpr::Variable(variable_ref) => { + let arg_expr = self + .evaluator + .environment() + .expressions() + .get(variable_ref.signature_index()) + .unwrap(); + match arg_expr { + ValueExpr::ValueLiteral(value) => Some(Ok(*value)), + ValueExpr::MacroInvocation(invocation) => { + match self.evaluator.push(self.context, *invocation) { + Ok(_) => continue, + Err(e) => Some(Err(e)), + } + } + } + } + }; } } } -// An iterator that pulls values from a template body and wraps them in a `RawFieldExpr` to -// mimic reading them from input. The LazyExpandedStruct handles evaluating any macros that this -// yields. -pub struct TemplateStructRawFieldsIterator<'top> { - // The struct element over whose fields we're iterating - struct_: &'top Struct, +/// An iterator that pulls expressions from a template body and wraps them in a [`RawFieldExpr`] to +/// mimic reading them from input. The [`LazyExpandedStruct`](crate::lazy::expanded::struct) handles +/// evaluating any macro invocations that this yields. +pub struct TemplateStructRawFieldsIterator<'top, D: LazyDecoder> { + context: EncodingContext<'top>, + environment: Environment<'top, D>, + template: TemplateMacroRef<'top>, + expressions: &'top [TemplateBodyValueExpr], index: usize, } -impl<'top> TemplateStructRawFieldsIterator<'top> { - pub fn new(struct_: &'top Struct) -> Self { - Self { struct_, index: 0 } +impl<'top, D: LazyDecoder> TemplateStructRawFieldsIterator<'top, D> { + pub fn new( + context: EncodingContext<'top>, + environment: Environment<'top, D>, + template: TemplateMacroRef<'top>, + expressions: &'top [TemplateBodyValueExpr], + ) -> Self { + Self { + context, + environment, + template, + expressions, + index: 0, + } } } -impl<'top> Iterator for TemplateStructRawFieldsIterator<'top> { - type Item = IonResult>; +impl<'top, D: LazyDecoder> Iterator for TemplateStructRawFieldsIterator<'top, D> { + type Item = IonResult, MacroExpr<'top, D>>>; fn next(&mut self) -> Option { - if let Some((name, element)) = self.struct_.get_index(self.index) { - self.index += 1; - let name = name.as_raw_symbol_token_ref(); - let value = if let Value::SExp(sequence) = element.value() { - RawValueExpr::MacroInvocation(sequence) - } else { - RawValueExpr::ValueLiteral(element) + let name_expr_address = self.index; + let name_element = self + .expressions + .get(name_expr_address)? + .expect_element() + .expect("field name must be a literal"); + let name_value = LazyExpandedValue::::from_template( + self.context, + // because the name token must be a literal, the env is irrelevant + Environment::empty(), + TemplateElement::new(self.template, name_element), + ); + let name_token = match name_value.read() { + Ok(ExpandedValueRef::Symbol(token)) => token, + Ok(ExpandedValueRef::String(str_ref)) => str_ref.into(), + Ok(_) => { + return Some(IonResult::decoding_error( + "template struct had a non-text field name", + )) + } + Err(e) => return Some(Err(e)), + }; + let value_expr_address = name_expr_address + 1; + let value_source = match self.expressions.get(value_expr_address) { + None => { + return Some(IonResult::decoding_error( + "template struct had field name with no value", + )) + } + Some(TemplateBodyValueExpr::Element(element)) => { + match element.value() { + TemplateValue::List(range) + | TemplateValue::SExp(range) + | TemplateValue::Struct(range) => self.index += range.len(), + _ => { + // Otherwise, the value is a scalar and is exactly one expression. We already + // accounted for the first expression, so there's nothing else to do here. + } + }; + RawValueExpr::ValueLiteral(ExpandedValueSource::Template( + self.environment, + TemplateElement::new(self.template, element), + )) + } + Some(TemplateBodyValueExpr::MacroInvocation(body_invocation)) => { + let invoked_macro = self + .context + .macro_table + .macro_at_address(body_invocation.invoked_macro_address) + .unwrap(); + let invocation = TemplateMacroInvocation::new( + self.context, + self.template, + invoked_macro, + self.template + .body + .expressions() + .get(body_invocation.arg_expr_range().ops_range()) + .unwrap(), + ); + self.index += invocation.arg_expressions.len(); + RawValueExpr::MacroInvocation(MacroExpr::TemplateMacro(invocation)) + } + Some(TemplateBodyValueExpr::Variable(variable)) => { + let arg_expr = match self.environment.get_expected(variable.signature_index()) { + Ok(expr) => expr, + Err(e) => return Some(Err(e)), + }; + match arg_expr { + ValueExpr::ValueLiteral(expansion) => { + RawValueExpr::ValueLiteral(expansion.source) + } + ValueExpr::MacroInvocation(invocation) => { + RawValueExpr::MacroInvocation(*invocation) + } + } + } + }; + self.index += 2; + Some(Ok(RawFieldExpr::NameValuePair(name_token, value_source))) + } +} + +/// Stores a sequence of expansion steps that need to be evaluated in turn. +/// +/// See [`TemplateBodyValueExpr`] for details. +#[derive(Debug, Clone, PartialEq)] +pub struct TemplateBody { + pub(crate) expressions: Vec, + // All of the elements stored in the Vec above share the Vec below for storing their annotations. + // This allows us to avoid allocating a `Vec` for every value in the template, saving + // a small amount of time and memory during compilation. Each values hold an index range + // into this `Vec`. + pub(crate) annotations_storage: Vec, +} + +impl TemplateBody { + pub fn expressions(&self) -> &[TemplateBodyValueExpr] { + &self.expressions + } + pub fn annotations_storage(&self) -> &[Symbol] { + &self.annotations_storage + } + + pub fn push_element(&mut self, element: TemplateBodyElement) { + self.expressions + .push(TemplateBodyValueExpr::Element(element)) + } + + pub fn push_variable(&mut self, signature_index: usize) { + self.expressions.push(TemplateBodyValueExpr::Variable( + TemplateBodyVariableReference::new(signature_index), + )) + } + + pub fn push_macro_invocation(&mut self, invoked_macro_address: usize, expr_range: ExprRange) { + self.expressions + .push(TemplateBodyValueExpr::MacroInvocation( + TemplateBodyMacroInvocation::new(invoked_macro_address, expr_range), + )) + } +} + +/// An expression appearing in value position in a template body. +#[derive(Debug, Clone, PartialEq)] +pub enum TemplateBodyValueExpr { + /// A potentially annotated value literal. + Element(TemplateBodyElement), + /// A reference to a variable that needs to be expanded. + Variable(TemplateBodyVariableReference), + /// A macro invocation that needs to be expanded. + MacroInvocation(TemplateBodyMacroInvocation), +} + +impl TemplateBodyValueExpr { + /// Returns `Ok(&element)` if this expression is an annotated value. Otherwise, returns + /// `Err(IonError)`. + pub fn expect_element(&self) -> IonResult<&TemplateBodyElement> { + match self { + TemplateBodyValueExpr::Element(e) => Ok(e), + TemplateBodyValueExpr::Variable(variable_reference) => { + let index = variable_reference.signature_index(); + IonResult::decoding_error(format!( + "expected an element, found reference variable with signature index '{index}'" + )) + } + TemplateBodyValueExpr::MacroInvocation(invocation) => { + let address = invocation.macro_address(); + IonResult::decoding_error(format!( + "expected an element, found macro at address {address}" + )) + } + } + } + + /// This helper method is invoked by the `Debug` implementation of `TemplateMacro`, which provides + /// a neatly indented, recursive printout of the compiled form of a template definition. + /// + /// `TemplateBodyValueExpr` also provides its own "shallow" implementation of `Debug` that simply + /// prints the contents of each field in the data of its variant. + pub(crate) fn fmt_expr( + f: &mut Formatter<'_>, + indentation: &mut String, + host_template: &TemplateMacro, + expr: &TemplateBodyValueExpr, + ) -> Result { + match &expr { + TemplateBodyValueExpr::Element(e) => { + Self::fmt_element(f, indentation, host_template, e) + } + TemplateBodyValueExpr::Variable(v) => { + Self::fmt_variable(f, indentation, host_template, v) + } + TemplateBodyValueExpr::MacroInvocation(m) => { + Self::fmt_invocation(f, indentation, host_template, m) + } + } + } + + /// A helper method to recursively print the 'compiled' form of a `TemplateBodyValueExpr::Element(_)`. + /// + /// This method is transitively invoked by [`TemplateMacro`]'s `Debug` implementation. + pub(crate) fn fmt_element( + f: &mut Formatter<'_>, + indentation: &mut String, + host_template: &TemplateMacro, + element: &TemplateBodyElement, + ) -> Result { + let annotations_range = element.annotations_range.ops_range(); + let annotations = host_template + .body() + .annotations_storage() + .get(annotations_range) + .unwrap(); + write!(f, "{indentation}")?; + for annotation in annotations { + write!(f, "{}::", annotation.text().unwrap_or("$0"))?; + } + use TemplateValue::*; + match element.value() { + List(l) => { + writeln!(f, "list")?; + return Self::fmt_sequence_body(f, indentation, host_template, *l); + } + SExp(s) => { + writeln!(f, "sexp")?; + return Self::fmt_sequence_body(f, indentation, host_template, *s); + } + Struct(s) => { + writeln!(f, "struct")?; + return Self::fmt_struct(f, indentation, host_template, *s); + } + Null(n) => writeln!(f, "{}", Value::Null(*n)), + Bool(b) => writeln!(f, "{b}"), + Int(i) => writeln!(f, "{i}"), + Float(float) => writeln!(f, "{}", *float), + Decimal(d) => writeln!(f, "{d}"), + Timestamp(t) => writeln!(f, "{t}"), + String(s) => writeln!(f, "{s}"), + Symbol(s) => writeln!(f, "{s}"), + Blob(b) => writeln!(f, "blob {:x?}", &b.as_ref()[..16]), + Clob(c) => writeln!(f, "clob {:x?}", &c.as_ref()[..16]), + }?; + Ok(1) + } + + /// A helper method to recursively print the 'compiled' form of lists, s-expressions, and + /// macro invocation argument sequences. + /// + /// This method is transitively invoked by [`TemplateMacro`]'s `Debug` implementation. + pub(crate) fn fmt_sequence_body( + f: &mut Formatter<'_>, + indentation: &mut String, + host_template: &TemplateMacro, + expr_range: ExprRange, + ) -> Result { + let range = expr_range.ops_range(); + let expressions = host_template.body().expressions().get(range).unwrap(); + indentation.push_str(" "); + let mut expr_index: usize = 0; + while expr_index < expressions.len() { + let expr = &expressions[expr_index]; + expr_index += Self::fmt_expr(f, indentation, host_template, expr)?; + } + indentation.truncate(indentation.len() - 4); + Ok(1 + expressions.len()) + } + + /// A helper method to recursively print the 'compiled' form of a struct. + /// + /// This method is transitively invoked by [`TemplateMacro`]'s `Debug` implementation. + pub(crate) fn fmt_struct( + f: &mut Formatter<'_>, + indentation: &mut String, + host_template: &TemplateMacro, + expr_range: ExprRange, + ) -> Result { + let range = expr_range.ops_range(); + let expressions = host_template.body().expressions().get(range).unwrap(); + indentation.push_str(" "); + let mut expr_index: usize = 0; + while expr_index < expressions.len() { + let TemplateBodyValueExpr::Element(name_element) = &expressions[expr_index] else { + unreachable!( + "non-element field name in template struct: {:?}", + &expressions[expr_index] + ) + }; + let name = match name_element.value() { + TemplateValue::String(s) => s.text(), + TemplateValue::Symbol(s) => s.text().unwrap_or("$0"), + unexpected => unreachable!( + "non-string, non-symbol field name in template struct: {:?}", + unexpected + ), }; - Some(Ok(RawFieldExpr::NameValuePair(name, value))) - } else { - None + let value = &expressions[expr_index + 1]; + writeln!(f, "{indentation}'{name}':")?; + indentation.push_str(" "); + expr_index += 1 + Self::fmt_expr(f, indentation, host_template, value)?; + indentation.truncate(indentation.len() - 4); } + indentation.truncate(indentation.len() - 4); + Ok(1 + expressions.len()) + } + + /// A helper method to recursively print the 'compiled' form of a macro invocation within a template. + /// + /// This method is transitively invoked by [`TemplateMacro`]'s `Debug` implementation. + pub(crate) fn fmt_invocation( + f: &mut Formatter<'_>, + indentation: &mut String, + host_template: &TemplateMacro, + invocation: &TemplateBodyMacroInvocation, + ) -> Result { + writeln!( + f, + "{indentation}", + invocation.invoked_macro_address + )?; + let args = host_template + .body + .expressions + .get(invocation.arg_expr_range.ops_range()) + .unwrap(); + + indentation.push_str(" "); + let mut expr_index: usize = 0; + while let Some(arg) = args.get(expr_index) { + expr_index += Self::fmt_expr(f, indentation, host_template, arg)?; + } + indentation.truncate(indentation.len() - 4); + Ok(1 + args.len()) + } + + /// A helper method to recursively print the 'compiled' form of a variable reference in the + /// body of a template. + /// + /// This method is transitively invoked by [`TemplateMacro`]'s `Debug` implementation. + pub(crate) fn fmt_variable( + f: &mut Formatter<'_>, + indentation: &mut String, + host_template: &TemplateMacro, + variable: &TemplateBodyVariableReference, + ) -> Result { + let index = variable.signature_index(); + let name = host_template + .signature() + .parameters() + .get(index) + .unwrap() + .name(); + writeln!(f, "{indentation}")?; + Ok(1) + } +} + +/// A macro invocation found in the body of a template. +/// +/// Because the template definition lives in the macro table (which may need to grow in the process +/// of evaluating a stream), this type holds the address of the invoked macro rather than a +/// reference to it. +#[derive(Debug, Copy, Clone, PartialEq)] +pub struct TemplateBodyMacroInvocation { + invoked_macro_address: MacroAddress, + arg_expr_range: ExprRange, +} + +impl TemplateBodyMacroInvocation { + pub fn new(invoked_macro_address: MacroAddress, arg_expr_range: ExprRange) -> Self { + Self { + invoked_macro_address, + arg_expr_range, + } + } + pub fn macro_address(&self) -> MacroAddress { + self.invoked_macro_address + } + pub fn arg_expr_range(&self) -> ExprRange { + self.arg_expr_range + } + + /// Finds the definition of the macro being invoked in the provided `context`'s macro table. + /// + /// It is a logic error for this method to be called with an [`EncodingContext`] that does not + /// contain the necessary information; doing so will cause this method to panic. + pub(crate) fn resolve<'top>( + self, + host_template: TemplateMacroRef<'top>, + context: EncodingContext<'top>, + ) -> TemplateMacroInvocation<'top> { + let invoked_macro = context + .macro_table + .macro_at_address(self.invoked_macro_address) + .unwrap(); + + let arg_expressions = host_template + .body + .expressions() + .get(self.arg_expr_range.ops_range()) + .unwrap(); + + TemplateMacroInvocation { + context, + host_template, + invoked_macro, + arg_expressions, + } + } +} + +/// A resolved version of [`TemplateBodyMacroInvocation`]; instead of holding addresses, this type +/// holds references to the invoked macro and its argument expressions. +#[derive(Copy, Clone)] +pub struct TemplateMacroInvocation<'top> { + context: EncodingContext<'top>, + // The definition of the template in which this macro invocation appears. This is useful as + // debugging information / viewing in stack traces. + host_template: TemplateMacroRef<'top>, + // The macro being invoked + invoked_macro: MacroRef<'top>, + // The range of value expressions in the host template's body that are arguments to the + // macro being invoked + arg_expressions: &'top [TemplateBodyValueExpr], +} + +impl<'top> Debug for TemplateMacroInvocation<'top> { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!( + f, + "TemplateMacroInvocation ", + self.invoked_macro.address() + ) + } +} + +impl<'top> TemplateMacroInvocation<'top> { + pub fn new( + context: EncodingContext<'top>, + host_template: TemplateMacroRef<'top>, + invoked_macro: MacroRef<'top>, + arg_expressions: &'top [TemplateBodyValueExpr], + ) -> Self { + Self { + context, + host_template, + invoked_macro, + arg_expressions, + } + } + + pub fn id(&self) -> MacroIdRef<'top> { + MacroIdRef::LocalAddress(self.invoked_macro.address()) + } + pub fn arguments( + &self, + environment: Environment<'top, D>, + ) -> TemplateMacroInvocationArgsIterator<'top, D> { + TemplateMacroInvocationArgsIterator::new(environment, *self) + } + pub fn host_template(&self) -> TemplateMacroRef<'top> { + self.host_template + } + pub fn arg_expressions(&self) -> &'top [TemplateBodyValueExpr] { + self.arg_expressions + } + pub fn invoked_macro(&self) -> MacroRef<'top> { + self.invoked_macro + } +} + +impl<'top, D: LazyDecoder> From> for MacroExpr<'top, D> { + fn from(value: TemplateMacroInvocation<'top>) -> Self { + MacroExpr::TemplateMacro(value) + } +} + +/// Steps over the argument expressions passed to a macro invocation found in a template body. +pub struct TemplateMacroInvocationArgsIterator<'top, D: LazyDecoder> { + environment: Environment<'top, D>, + invocation: TemplateMacroInvocation<'top>, + arg_index: usize, +} + +impl<'top, D: LazyDecoder> TemplateMacroInvocationArgsIterator<'top, D> { + pub fn new( + environment: Environment<'top, D>, + invocation: TemplateMacroInvocation<'top>, + ) -> Self { + Self { + environment, + invocation, + arg_index: 0, + } + } +} + +impl<'top, D: LazyDecoder> Iterator for TemplateMacroInvocationArgsIterator<'top, D> { + type Item = IonResult>; + + fn next(&mut self) -> Option { + let arg = self.invocation.arg_expressions().get(self.arg_index)?; + self.arg_index += 1; + let arg_expr = match arg { + TemplateBodyValueExpr::Element(e) => { + // If it's a container, skip over its contents when this iterator resumes + match e.value() { + TemplateValue::List(range) + | TemplateValue::SExp(range) + | TemplateValue::Struct(range) => { + self.arg_index += range.len(); + } + _ => { + // If it's a scalar, it has already been accounted for. + } + }; + ValueExpr::ValueLiteral(LazyExpandedValue::from_template( + self.invocation.context, + self.environment, + TemplateElement::new(self.invocation.host_template(), e), + )) + } + TemplateBodyValueExpr::Variable(variable_ref) => match self + .environment + .get_expected(variable_ref.signature_index()) + { + Ok(expr) => *expr, + Err(e) => return Some(Err(e)), + }, + TemplateBodyValueExpr::MacroInvocation(body_invocation) => { + let invocation = body_invocation + .resolve(self.invocation.host_template(), self.invocation.context); + // Skip over all of the expressions that belong to this invocation. + self.arg_index += invocation.arg_expressions.len(); + ValueExpr::MacroInvocation(invocation.into()) + } + }; + + Some(Ok(arg_expr)) + } +} + +/// A reference to a variable in a template body. +#[derive(Debug, Copy, Clone, PartialEq)] +pub struct TemplateBodyVariableReference { + signature_index: usize, +} + +impl TemplateBodyVariableReference { + pub fn new(signature_index: usize) -> Self { + Self { signature_index } + } + pub fn signature_index(&self) -> usize { + self.signature_index + } +} + +/// A value literal found in the body of a template. +/// +/// This type is similar to [`TemplateBodyElement`], but holds resolved references rather than +/// addresses. +#[derive(Clone, Copy, Debug)] +pub struct TemplateElement<'top> { + // This type holds a reference to the host template macro, which contains some shared resources + // like a `Vec` of annotation definitions. + template: TemplateMacroRef<'top>, + element: &'top TemplateBodyElement, +} + +impl<'top> TemplateElement<'top> { + pub fn new(template: TemplateMacroRef<'top>, element: &'top TemplateBodyElement) -> Self { + Self { template, element } + } + pub fn annotations(&self) -> &'top [Symbol] { + self.template + .body() + .annotations_storage() + .get(self.element.annotations_range().ops_range()) + .unwrap() + } + + pub fn annotations_range(&self) -> AnnotationsRange { + self.element.annotations_range + } + + pub fn value(&self) -> &'top TemplateValue { + &self.element.value + } + pub fn template(&self) -> TemplateMacroRef<'top> { + self.template + } +} + +/// An annotated value in a template body. +#[derive(Debug, Clone, PartialEq)] +pub struct TemplateBodyElement { + // To minimize allocations, all annotations live in a single `Vec` in the `TemplateBody`. + // Each element holds a range pointing to its annotation sequence. + pub(crate) annotations_range: AnnotationsRange, + pub(crate) value: TemplateValue, +} + +impl TemplateBodyElement { + pub fn with_value(value: TemplateValue) -> Self { + Self { + annotations_range: AnnotationsRange::empty(), + value, + } + } + pub fn with_annotations(mut self, range: Range) -> Self { + self.annotations_range = AnnotationsRange::new(range); + self + } + + pub fn value(&self) -> &TemplateValue { + &self.value + } + pub fn annotations_range(&self) -> AnnotationsRange { + self.annotations_range + } + + pub fn annotations<'a>(&self, template: &'a TemplateMacro) -> &'a [Symbol] { + template + .body() + .annotations_storage() + .get(self.annotations_range().ops_range()) + // If the annotations range is invalid, that's a bug; we cannot return an error. + .unwrap() + } +} + +/// A value literal found int he body of a template. This type is similar to [`Value`], but its +/// container types hold ranges of expression addresses rather than a materialized tree of data. +#[derive(Debug, Clone, PartialEq)] +pub enum TemplateValue { + Null(IonType), + Bool(bool), + Int(Int), + Float(f64), + Decimal(Decimal), + Timestamp(Timestamp), + Symbol(Symbol), + String(Str), + Clob(Bytes), + Blob(Bytes), + // The range of ensuing `TemplateBodyValueExpr`s that belong to this container. + List(ExprRange), + SExp(ExprRange), + Struct(ExprRange), +} + +impl TemplateValue { + pub fn is_null(&self) -> bool { + matches!(self, TemplateValue::Null(_)) + } + + pub fn ion_type(&self) -> IonType { + // TODO: Implement this with a Rust macro instead. + // See: https://github.com/amazon-ion/ion-rust/issues/650 + use TemplateValue::*; + match self { + Null(ion_type) => *ion_type, + Bool(_) => IonType::Bool, + Int(_) => IonType::Int, + Float(_) => IonType::Float, + Decimal(_) => IonType::Decimal, + Timestamp(_) => IonType::Timestamp, + Symbol(_) => IonType::Symbol, + String(_) => IonType::String, + Clob(_) => IonType::Clob, + Blob(_) => IonType::Blob, + List(_) => IonType::List, + SExp(_) => IonType::SExp, + Struct(_) => IonType::Struct, + } + } +} + +/// A slice of a [`TemplateBody`]'s sequence of `TemplateExpansionStep`. This type can be used to +/// represent containers (list, sexp, struct) or macro invocations, all of which use an evaluator +/// to iteratively evaluate a series of `TemplateExpansionStep`s. This type does not hold a reference +/// to the template definition itself. +pub type ExprRange = SmallRange; + +/// A slice of a [`TemplateBody`]'s shared `Vec` of annotation definitions. Each value in the +/// template body holds an `AnnotationsRange` that indicates which annotations in the shared +/// collections apply to it. +pub type AnnotationsRange = SmallRange; + +/// A range that takes 8 bytes instead of `Range`'s 16. This is useful for cases like +/// annotations where a capacity of 4 billion+ is more than enough. It also implements `Copy`, +/// making it possible for enclosing types to also implement `Copy`. +#[derive(Clone, Copy, Debug, PartialEq)] +pub struct SmallRange { + start: u32, + end: u32, +} + +impl SmallRange { + pub fn empty() -> Self { + Self { start: 0, end: 0 } + } + + pub fn new(range: Range) -> Self { + debug_assert!(u32::try_from(range.start).is_ok()); + debug_assert!(u32::try_from(range.end).is_ok()); + Self { + start: range.start as u32, + end: range.end as u32, + } + } + + /// Produces an equivalent [`std::ops::Range`]. + /// + /// `std::ops::Range` is twice as large as `SmallRange` on 64 bit machines and does not + /// implement the `Copy` trait. This method is a convenience that allows a `SmallRange` to be + /// used as a collection index. + // We are not able to implement `std::ops::Index` for the stdlib's collections as + // this crate owns neither the `Index` trait nor the collections themselves. + pub fn ops_range(&self) -> Range { + self.start as usize..self.end as usize + } + + pub fn len(&self) -> usize { + (self.end - self.start) as usize + } + + pub fn is_empty(&self) -> bool { + self.len() == 0 } } diff --git a/src/lazy/lazy_value_cache.rs b/src/lazy/lazy_value_cache.rs new file mode 100644 index 00000000..7e9df0b9 --- /dev/null +++ b/src/lazy/lazy_value_cache.rs @@ -0,0 +1,6 @@ +use crate::lazy::decoder::{LazyDecoder, LazyRawValueExpr}; +use bumpalo::collections::Vec as BumpVec; + +pub struct RawValueExprCache<'top, D: LazyDecoder> { + values: BumpVec<'top, LazyRawValueExpr<'top, D>>, +} diff --git a/src/lazy/mod.rs b/src/lazy/mod.rs index f1bf52d9..00d0dd6f 100644 --- a/src/lazy/mod.rs +++ b/src/lazy/mod.rs @@ -5,8 +5,9 @@ pub mod any_encoding; pub mod binary; pub mod bytes_ref; pub mod decoder; -pub(crate) mod encoding; +pub mod encoding; pub mod expanded; +pub mod lazy_value_cache; mod never; pub mod raw_stream_item; pub mod raw_value_ref; diff --git a/src/lazy/never.rs b/src/lazy/never.rs index b794bb51..9281046d 100644 --- a/src/lazy/never.rs +++ b/src/lazy/never.rs @@ -1,6 +1,7 @@ -use crate::lazy::decoder::LazyDecoder; -use crate::lazy::expanded::macro_evaluator::{ArgumentKind, MacroInvocation, ToArgumentKind}; -use crate::lazy::expanded::EncodingContext; +use std::fmt::Debug; + +use crate::lazy::decoder::{LazyDecoder, LazyRawValueExpr}; +use crate::lazy::expanded::macro_evaluator::{MacroExpr, RawEExpression}; use crate::lazy::text::raw::v1_1::reader::MacroIdRef; use crate::IonResult; @@ -12,28 +13,21 @@ pub enum Never { // Ion 1.0 uses `Never` as a placeholder type for MacroInvocation. // The compiler should optimize these methods away. -impl<'data, D: LazyDecoder<'data>> MacroInvocation<'data, D> for Never { - type ArgumentExpr = Never; - // This uses a Box to avoid defining yet another placeholder type. - type ArgumentsIterator = Box>>; +impl<'top, D: LazyDecoder = Self>> RawEExpression<'top, D> for Never { + // These use Box to avoid defining yet another placeholder type. + type RawArgumentsIterator<'a> = Box>>>; - fn id(&self) -> MacroIdRef<'_> { + fn id(&self) -> MacroIdRef<'top> { unreachable!("macro in Ion 1.0 (method: id)") } - fn arguments(&self) -> Self::ArgumentsIterator { + fn raw_arguments(&self) -> Self::RawArgumentsIterator<'_> { unreachable!("macro in Ion 1.0 (method: arguments)") } } -impl<'data, D: LazyDecoder<'data>> ToArgumentKind<'data, D, Self> for Never { - fn to_arg_expr<'top>( - self, - _context: EncodingContext<'top>, - ) -> ArgumentKind<'top, 'data, D, Self> - where - Self: 'top, - { - unreachable!("macro in Ion 1.0 (method: to_arg_expr)") +impl<'top, D: LazyDecoder> From for MacroExpr<'top, D> { + fn from(_value: Never) -> Self { + unreachable!("macro in Ion 1.0 (method: into)") } } diff --git a/src/lazy/raw_stream_item.rs b/src/lazy/raw_stream_item.rs index 13f10138..5cef4fc9 100644 --- a/src/lazy/raw_stream_item.rs +++ b/src/lazy/raw_stream_item.rs @@ -1,25 +1,28 @@ use crate::lazy::decoder::LazyDecoder; -use crate::lazy::encoding::EncodingWithMacroSupport; use crate::result::IonFailure; use crate::{IonError, IonResult}; +use std::fmt::Debug; #[derive(Debug)] /// Raw stream components that a RawReader may encounter. -pub enum RawStreamItem<'data, D: LazyDecoder<'data>> { +pub enum RawStreamItem { /// An Ion Version Marker (IVM) indicating the Ion major and minor version that were used to /// encode the values that follow. VersionMarker(u8, u8), /// An Ion value whose data has not yet been read. For more information about how to read its /// data and (in the case of containers) access any nested values, see the documentation /// for [`LazyRawBinaryValue`](crate::lazy::binary::raw::value::LazyRawBinaryValue). - Value(D::Value), + Value(V), /// An Ion 1.1+ macro invocation. Ion 1.0 readers will never return a macro invocation. - EExpression(D::MacroInvocation), + EExpression(E), /// The end of the stream EndOfStream, } -impl<'data, D: LazyDecoder<'data>> RawStreamItem<'data, D> { +pub type LazyRawStreamItem<'top, D> = + RawStreamItem<::Value<'top>, ::EExpression<'top>>; + +impl RawStreamItem { /// If this item is an Ion version marker (IVM), returns `Some((major, minor))` indicating the /// version. Otherwise, returns `None`. pub fn version_marker(&self) -> Option<(u8, u8)> { @@ -38,7 +41,7 @@ impl<'data, D: LazyDecoder<'data>> RawStreamItem<'data, D> { } /// If this item is a value, returns `Some(&LazyValue)`. Otherwise, returns `None`. - pub fn value(&self) -> Option<&D::Value> { + pub fn value(&self) -> Option<&V> { if let Self::Value(value) = self { Some(value) } else { @@ -48,17 +51,15 @@ impl<'data, D: LazyDecoder<'data>> RawStreamItem<'data, D> { /// Like [`Self::value`], but returns a [`IonError::Decoding`] if this item is not /// a value. - pub fn expect_value(self) -> IonResult { + pub fn expect_value(self) -> IonResult { if let Self::Value(value) = self { Ok(value) } else { IonResult::decoding_error(format!("expected value, found {:?}", self)) } } -} -impl<'data, D: LazyDecoder<'data> + EncodingWithMacroSupport> RawStreamItem<'data, D> { - pub fn as_macro_invocation(&self) -> Option<&D::MacroInvocation> { + pub fn as_macro_invocation(&self) -> Option<&E> { if let Self::EExpression(m) = self { Some(m) } else { @@ -66,7 +67,7 @@ impl<'data, D: LazyDecoder<'data> + EncodingWithMacroSupport> RawStreamItem<'dat } } - pub fn expect_macro_invocation(self) -> IonResult { + pub fn expect_macro_invocation(self) -> IonResult { if let Self::EExpression(m) = self { Ok(m) } else { diff --git a/src/lazy/raw_value_ref.rs b/src/lazy/raw_value_ref.rs index 43819740..7c169960 100644 --- a/src/lazy/raw_value_ref.rs +++ b/src/lazy/raw_value_ref.rs @@ -10,24 +10,24 @@ use std::fmt::{Debug, Formatter}; /// or text literal). If it is a symbol ID, a symbol table will be needed to find its associated text. /// /// For a resolved version of this type, see [crate::lazy::value_ref::ValueRef]. -pub enum RawValueRef<'data, D: LazyDecoder<'data>> { +pub enum RawValueRef<'top, D: LazyDecoder> { Null(IonType), Bool(bool), Int(Int), Float(f64), Decimal(Decimal), Timestamp(Timestamp), - String(StrRef<'data>), - Symbol(RawSymbolTokenRef<'data>), - Blob(BytesRef<'data>), - Clob(BytesRef<'data>), - SExp(D::SExp), - List(D::List), - Struct(D::Struct), + String(StrRef<'top>), + Symbol(RawSymbolTokenRef<'top>), + Blob(BytesRef<'top>), + Clob(BytesRef<'top>), + SExp(D::SExp<'top>), + List(D::List<'top>), + Struct(D::Struct<'top>), } // Provides equality for scalar types, but not containers. -impl<'data, D: LazyDecoder<'data>> PartialEq for RawValueRef<'data, D> { +impl<'top, D: LazyDecoder> PartialEq for RawValueRef<'top, D> { fn eq(&self, other: &Self) -> bool { use RawValueRef::*; match (self, other) { @@ -48,7 +48,7 @@ impl<'data, D: LazyDecoder<'data>> PartialEq for RawValueRef<'data, D> { } } -impl<'data, D: LazyDecoder<'data>> Debug for RawValueRef<'data, D> { +impl<'top, D: LazyDecoder> Debug for RawValueRef<'top, D> { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { match self { RawValueRef::Null(ion_type) => write!(f, "null.{}", ion_type), @@ -68,7 +68,7 @@ impl<'data, D: LazyDecoder<'data>> Debug for RawValueRef<'data, D> { } } -impl<'data, D: LazyDecoder<'data>> RawValueRef<'data, D> { +impl<'top, D: LazyDecoder> RawValueRef<'top, D> { pub fn expect_null(self) -> IonResult { if let RawValueRef::Null(ion_type) = self { Ok(ion_type) @@ -125,7 +125,7 @@ impl<'data, D: LazyDecoder<'data>> RawValueRef<'data, D> { } } - pub fn expect_string(self) -> IonResult> { + pub fn expect_string(self) -> IonResult> { if let RawValueRef::String(s) = self { Ok(s) } else { @@ -133,7 +133,7 @@ impl<'data, D: LazyDecoder<'data>> RawValueRef<'data, D> { } } - pub fn expect_symbol(self) -> IonResult> { + pub fn expect_symbol(self) -> IonResult> { if let RawValueRef::Symbol(s) = self { Ok(s.clone()) } else { @@ -141,7 +141,7 @@ impl<'data, D: LazyDecoder<'data>> RawValueRef<'data, D> { } } - pub fn expect_blob(self) -> IonResult> { + pub fn expect_blob(self) -> IonResult> { if let RawValueRef::Blob(b) = self { Ok(b) } else { @@ -149,7 +149,7 @@ impl<'data, D: LazyDecoder<'data>> RawValueRef<'data, D> { } } - pub fn expect_clob(self) -> IonResult> { + pub fn expect_clob(self) -> IonResult> { if let RawValueRef::Clob(c) = self { Ok(c) } else { @@ -157,7 +157,7 @@ impl<'data, D: LazyDecoder<'data>> RawValueRef<'data, D> { } } - pub fn expect_list(self) -> IonResult { + pub fn expect_list(self) -> IonResult> { if let RawValueRef::List(s) = self { Ok(s) } else { @@ -165,7 +165,7 @@ impl<'data, D: LazyDecoder<'data>> RawValueRef<'data, D> { } } - pub fn expect_sexp(self) -> IonResult { + pub fn expect_sexp(self) -> IonResult> { if let RawValueRef::SExp(s) = self { Ok(s) } else { @@ -173,7 +173,7 @@ impl<'data, D: LazyDecoder<'data>> RawValueRef<'data, D> { } } - pub fn expect_struct(self) -> IonResult { + pub fn expect_struct(self) -> IonResult> { if let RawValueRef::Struct(s) = self { Ok(s) } else { diff --git a/src/lazy/reader.rs b/src/lazy/reader.rs index 74b9c7be..8d2eafff 100644 --- a/src/lazy/reader.rs +++ b/src/lazy/reader.rs @@ -9,6 +9,7 @@ use crate::lazy::encoding::{BinaryEncoding_1_0, TextEncoding_1_0, TextEncoding_1 use crate::lazy::system_reader::{ LazySystemAnyReader, LazySystemBinaryReader, LazySystemReader, LazySystemTextReader_1_1, }; +use crate::lazy::text::raw::v1_1::reader::MacroAddress; use crate::lazy::value::LazyValue; use crate::result::IonFailure; use crate::{IonError, IonResult}; @@ -60,27 +61,33 @@ use crate::{IonError, IonResult}; ///# Ok(()) ///# } /// ``` -pub struct LazyApplicationReader<'data, D: LazyDecoder<'data>> { +pub struct LazyApplicationReader<'data, D: LazyDecoder> { system_reader: LazySystemReader<'data, D>, } -pub(crate) enum NextApplicationValue<'top, 'data, D: LazyDecoder<'data>> { - ApplicationValue(LazyValue<'top, 'data, D>), +pub(crate) enum NextApplicationValue<'top, D: LazyDecoder> { + ApplicationValue(LazyValue<'top, D>), SystemValue, EndOfStream, } -impl<'data, D: LazyDecoder<'data>> LazyApplicationReader<'data, D> { +impl<'data, D: LazyDecoder> LazyApplicationReader<'data, D> { /// Returns the next top-level value in the input stream as `Ok(Some(lazy_value))`. /// If there are no more top-level values in the stream, returns `Ok(None)`. /// If the next value is incomplete (that is: only part of it is in the input buffer) or if the /// input buffer contains invalid data, returns `Err(ion_error)`. - pub fn next<'top>(&'top mut self) -> IonResult>> { + pub fn next<'top>(&'top mut self) -> IonResult>> + where + 'data: 'top, + { self.system_reader.next_value() } /// Like [`Self::next`], but returns an `IonError` if there are no more values in the stream. - pub fn expect_next<'top>(&'top mut self) -> IonResult> { + pub fn expect_next<'top>(&'top mut self) -> IonResult> + where + 'data: 'top, + { self.next()? .ok_or_else(|| IonError::decoding_error("expected another top-level value")) } @@ -116,13 +123,21 @@ impl<'data> LazyTextReader_1_1<'data> { let system_reader = LazySystemTextReader_1_1::new(ion_data); Ok(LazyApplicationReader { system_reader }) } + + // Temporary method for defining/testing templates. + // TODO: Remove this when the reader can understand 1.1 encoding directives. + pub fn register_template(&mut self, template_definition: &str) -> IonResult { + self.system_reader + .expanding_reader + .register_template(template_definition) + } } -pub struct LazyElementIterator<'iter, 'data, D: LazyDecoder<'data>> { +pub struct LazyElementIterator<'iter, 'data, D: LazyDecoder> { lazy_reader: &'iter mut LazyApplicationReader<'data, D>, } -impl<'iter, 'data, D: LazyDecoder<'data>> Iterator for LazyElementIterator<'iter, 'data, D> { +impl<'iter, 'data, D: LazyDecoder> Iterator for LazyElementIterator<'iter, 'data, D> { type Item = IonResult; fn next(&mut self) -> Option { @@ -134,7 +149,7 @@ impl<'iter, 'data, D: LazyDecoder<'data>> Iterator for LazyElementIterator<'iter } } -impl<'data, D: LazyDecoder<'data>> ElementReader for LazyApplicationReader<'data, D> { +impl<'data, D: LazyDecoder> ElementReader for LazyApplicationReader<'data, D> { type ElementIterator<'a> = LazyElementIterator<'a, 'data, D> where Self: 'a,; fn read_next_element(&mut self) -> IonResult> { @@ -153,7 +168,6 @@ impl<'data, D: LazyDecoder<'data>> ElementReader for LazyApplicationReader<'data #[cfg(test)] mod tests { - use super::*; use crate::element::writer::ElementWriter; use crate::element::Element; use crate::lazy::value_ref::ValueRef; @@ -161,6 +175,8 @@ mod tests { ion_list, ion_sexp, ion_struct, BinaryWriterBuilder, Int, IonResult, IonType, IonWriter, }; + use super::*; + fn to_binary_ion(text_ion: &str) -> IonResult> { let mut buffer = Vec::new(); let mut writer = BinaryWriterBuilder::default().build(&mut buffer)?; diff --git a/src/lazy/sequence.rs b/src/lazy/sequence.rs index 2bc12232..e4a0f6fa 100644 --- a/src/lazy/sequence.rs +++ b/src/lazy/sequence.rs @@ -48,15 +48,15 @@ use crate::{IonError, IonResult}; ///# Ok(()) ///# } /// ``` -pub struct LazyList<'top, 'data, D: LazyDecoder<'data>> { - pub(crate) expanded_list: LazyExpandedList<'top, 'data, D>, +pub struct LazyList<'top, D: LazyDecoder> { + pub(crate) expanded_list: LazyExpandedList<'top, D>, } -pub type LazyBinarySequence<'top, 'data> = LazyList<'top, 'data, BinaryEncoding_1_0>; +pub type LazyBinarySequence<'top, 'data> = LazyList<'top, BinaryEncoding_1_0>; -impl<'top, 'data, D: LazyDecoder<'data>> LazyList<'top, 'data, D> { +impl<'top, D: LazyDecoder> LazyList<'top, D> { /// Returns an iterator over the values in this sequence. See: [`LazyValue`]. - pub fn iter(&self) -> ListIterator<'top, 'data, D> { + pub fn iter(&self) -> ListIterator<'top, D> { ListIterator { expanded_list_iter: self.expanded_list.iter(), } @@ -90,7 +90,7 @@ impl<'top, 'data, D: LazyDecoder<'data>> LazyList<'top, 'data, D> { ///# Ok(()) ///# } /// ``` - pub fn annotations(&self) -> AnnotationsIterator<'top, 'data, D> { + pub fn annotations(&self) -> AnnotationsIterator<'top, D> { AnnotationsIterator { expanded_annotations: self.expanded_list.annotations(), symbol_table: self.expanded_list.context.symbol_table, @@ -98,10 +98,10 @@ impl<'top, 'data, D: LazyDecoder<'data>> LazyList<'top, 'data, D> { } } -impl<'top, 'data, D: LazyDecoder<'data>> TryFrom> for Sequence { +impl<'top, D: LazyDecoder> TryFrom> for Sequence { type Error = IonError; - fn try_from(lazy_sequence: LazyList<'top, 'data, D>) -> Result { + fn try_from(lazy_sequence: LazyList<'top, D>) -> Result { let sequence: Sequence = lazy_sequence .iter() .map(|v| Element::try_from(v?)) @@ -111,10 +111,10 @@ impl<'top, 'data, D: LazyDecoder<'data>> TryFrom> for S } } -impl<'top, 'data, D: LazyDecoder<'data>> TryFrom> for Element { +impl<'top, D: LazyDecoder> TryFrom> for Element { type Error = IonError; - fn try_from(lazy_list: LazyList<'top, 'data, D>) -> Result { + fn try_from(lazy_list: LazyList<'top, D>) -> Result { let annotations: Annotations = lazy_list.annotations().try_into()?; let sequence: Sequence = lazy_list.try_into()?; let value = Value::List(sequence); @@ -122,21 +122,21 @@ impl<'top, 'data, D: LazyDecoder<'data>> TryFrom> for E } } -impl<'a, 'top, 'data: 'top, D: LazyDecoder<'data>> IntoIterator for &'a LazyList<'top, 'data, D> { - type Item = IonResult>; - type IntoIter = ListIterator<'top, 'data, D>; +impl<'a, 'top, 'data: 'top, D: LazyDecoder> IntoIterator for &'a LazyList<'top, D> { + type Item = IonResult>; + type IntoIter = ListIterator<'top, D>; fn into_iter(self) -> Self::IntoIter { self.iter() } } -pub struct ListIterator<'top, 'data, D: LazyDecoder<'data>> { - expanded_list_iter: ExpandedListIterator<'top, 'data, D>, +pub struct ListIterator<'top, D: LazyDecoder> { + expanded_list_iter: ExpandedListIterator<'top, D>, } -impl<'top, 'data, D: LazyDecoder<'data>> Iterator for ListIterator<'top, 'data, D> { - type Item = IonResult>; +impl<'top, D: LazyDecoder> Iterator for ListIterator<'top, D> { + type Item = IonResult>; fn next(&mut self) -> Option { let expanded_value = match self.expanded_list_iter.next() { @@ -150,7 +150,7 @@ impl<'top, 'data, D: LazyDecoder<'data>> Iterator for ListIterator<'top, 'data, } } -impl<'top, 'data, D: LazyDecoder<'data>> Debug for LazyList<'top, 'data, D> { +impl<'top, D: LazyDecoder> Debug for LazyList<'top, D> { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { write!(f, "[")?; for value in self { @@ -164,11 +164,11 @@ impl<'top, 'data, D: LazyDecoder<'data>> Debug for LazyList<'top, 'data, D> { // ===== SExps ===== -pub struct LazySExp<'top, 'data, D: LazyDecoder<'data>> { - pub(crate) expanded_sexp: LazyExpandedSExp<'top, 'data, D>, +pub struct LazySExp<'top, D: LazyDecoder> { + pub(crate) expanded_sexp: LazyExpandedSExp<'top, D>, } -impl<'top, 'data, D: LazyDecoder<'data>> Debug for LazySExp<'top, 'data, D> { +impl<'top, D: LazyDecoder> Debug for LazySExp<'top, D> { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { write!(f, "(")?; for value in self { @@ -180,9 +180,9 @@ impl<'top, 'data, D: LazyDecoder<'data>> Debug for LazySExp<'top, 'data, D> { } } -impl<'top, 'data, D: LazyDecoder<'data>> LazySExp<'top, 'data, D> { +impl<'top, D: LazyDecoder> LazySExp<'top, D> { /// Returns an iterator over the values in this sequence. See: [`LazyValue`]. - pub fn iter(&self) -> SExpIterator<'top, 'data, D> { + pub fn iter(&self) -> SExpIterator<'top, D> { SExpIterator { expanded_sexp_iter: self.expanded_sexp.iter(), } @@ -216,7 +216,7 @@ impl<'top, 'data, D: LazyDecoder<'data>> LazySExp<'top, 'data, D> { ///# Ok(()) ///# } /// ``` - pub fn annotations(&self) -> AnnotationsIterator<'top, 'data, D> { + pub fn annotations(&self) -> AnnotationsIterator<'top, D> { AnnotationsIterator { expanded_annotations: self.expanded_sexp.annotations(), symbol_table: self.expanded_sexp.context.symbol_table, @@ -224,10 +224,10 @@ impl<'top, 'data, D: LazyDecoder<'data>> LazySExp<'top, 'data, D> { } } -impl<'top, 'data, D: LazyDecoder<'data>> TryFrom> for Sequence { +impl<'top, D: LazyDecoder> TryFrom> for Sequence { type Error = IonError; - fn try_from(lazy_sequence: LazySExp<'top, 'data, D>) -> Result { + fn try_from(lazy_sequence: LazySExp<'top, D>) -> Result { let sequence: Sequence = lazy_sequence .iter() .map(|v| Element::try_from(v?)) @@ -237,10 +237,10 @@ impl<'top, 'data, D: LazyDecoder<'data>> TryFrom> for S } } -impl<'top, 'data, D: LazyDecoder<'data>> TryFrom> for Element { +impl<'top, D: LazyDecoder> TryFrom> for Element { type Error = IonError; - fn try_from(lazy_sequence: LazySExp<'top, 'data, D>) -> Result { + fn try_from(lazy_sequence: LazySExp<'top, D>) -> Result { let annotations: Annotations = lazy_sequence.annotations().try_into()?; let sequence: Sequence = lazy_sequence.try_into()?; let value = Value::SExp(sequence); @@ -248,21 +248,21 @@ impl<'top, 'data, D: LazyDecoder<'data>> TryFrom> for E } } -impl<'a, 'top, 'data: 'top, D: LazyDecoder<'data>> IntoIterator for &'a LazySExp<'top, 'data, D> { - type Item = IonResult>; - type IntoIter = SExpIterator<'top, 'data, D>; +impl<'a, 'top, 'data: 'top, D: LazyDecoder> IntoIterator for &'a LazySExp<'top, D> { + type Item = IonResult>; + type IntoIter = SExpIterator<'top, D>; fn into_iter(self) -> Self::IntoIter { self.iter() } } -pub struct SExpIterator<'top, 'data, D: LazyDecoder<'data>> { - expanded_sexp_iter: ExpandedSExpIterator<'top, 'data, D>, +pub struct SExpIterator<'top, D: LazyDecoder> { + expanded_sexp_iter: ExpandedSExpIterator<'top, D>, } -impl<'top, 'data, D: LazyDecoder<'data>> Iterator for SExpIterator<'top, 'data, D> { - type Item = IonResult>; +impl<'top, D: LazyDecoder> Iterator for SExpIterator<'top, D> { + type Item = IonResult>; fn next(&mut self) -> Option { let expanded_value = match self.expanded_sexp_iter.next() { diff --git a/src/lazy/struct.rs b/src/lazy/struct.rs index dc9abbac..f4401a32 100644 --- a/src/lazy/struct.rs +++ b/src/lazy/struct.rs @@ -45,15 +45,15 @@ use crate::{ ///# } /// ``` #[derive(Clone)] -pub struct LazyStruct<'top, 'data, D: LazyDecoder<'data>> { - pub(crate) expanded_struct: LazyExpandedStruct<'top, 'data, D>, +pub struct LazyStruct<'top, D: LazyDecoder> { + pub(crate) expanded_struct: LazyExpandedStruct<'top, D>, } -pub type LazyBinaryStruct<'top, 'data> = LazyStruct<'top, 'data, BinaryEncoding_1_0>; +pub type LazyBinaryStruct<'top> = LazyStruct<'top, BinaryEncoding_1_0>; // Best-effort debug formatting for LazyStruct. Any failures that occur during reading will result // in the output being silently truncated. -impl<'top, 'data, D: LazyDecoder<'data>> Debug for LazyStruct<'top, 'data, D> { +impl<'top, D: LazyDecoder> Debug for LazyStruct<'top, D> { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { write!(f, "{{")?; for field in self { @@ -68,9 +68,9 @@ impl<'top, 'data, D: LazyDecoder<'data>> Debug for LazyStruct<'top, 'data, D> { } } -impl<'top, 'data: 'top, D: LazyDecoder<'data>> LazyStruct<'top, 'data, D> { +impl<'top, D: LazyDecoder> LazyStruct<'top, D> { /// Returns an iterator over this struct's fields. See [`LazyField`]. - pub fn iter(&self) -> StructIterator<'top, 'data, D> { + pub fn iter(&self) -> StructIterator<'top, D> { StructIterator { expanded_struct_iter: self.expanded_struct.iter(), } @@ -107,11 +107,11 @@ impl<'top, 'data: 'top, D: LazyDecoder<'data>> LazyStruct<'top, 'data, D> { ///# Ok(()) ///# } /// ``` - pub fn find(&self, name: &str) -> IonResult>> { + pub fn find(&self, name: &str) -> IonResult>> { for field in self { let field = field?; if field.name()? == name { - let expanded_value = field.expanded_field.value().clone(); + let expanded_value = *field.expanded_field.value(); let value = LazyValue::new(expanded_value); return Ok(Some(value)); } @@ -139,7 +139,7 @@ impl<'top, 'data: 'top, D: LazyDecoder<'data>> LazyStruct<'top, 'data, D> { ///# Ok(()) ///# } /// ``` - pub fn find_expected(&self, name: &str) -> IonResult> { + pub fn find_expected(&self, name: &str) -> IonResult> { self.find(name)? .ok_or_else(|| IonError::decoding_error(format!("missing required field {}", name))) } @@ -165,10 +165,7 @@ impl<'top, 'data: 'top, D: LazyDecoder<'data>> LazyStruct<'top, 'data, D> { ///# Ok(()) ///# } /// ``` - pub fn get(&self, name: &str) -> IonResult>> - where - 'data: 'top, - { + pub fn get(&self, name: &str) -> IonResult>> { self.find(name)?.map(|f| f.read()).transpose() } @@ -192,10 +189,7 @@ impl<'top, 'data: 'top, D: LazyDecoder<'data>> LazyStruct<'top, 'data, D> { ///# Ok(()) ///# } /// ``` - pub fn get_expected(&self, name: &str) -> IonResult> - where - 'data: 'top, - { + pub fn get_expected(&self, name: &str) -> IonResult> { self.get(name)?.ok_or_else(move || { IonError::decoding_error(format!("missing required field {}", name)) }) @@ -230,7 +224,7 @@ impl<'top, 'data: 'top, D: LazyDecoder<'data>> LazyStruct<'top, 'data, D> { ///# Ok(()) ///# } /// ``` - pub fn annotations(&self) -> AnnotationsIterator<'top, 'data, D> { + pub fn annotations(&self) -> AnnotationsIterator<'top, D> { AnnotationsIterator { expanded_annotations: self.expanded_struct.annotations(), symbol_table: self.expanded_struct.context.symbol_table, @@ -239,11 +233,11 @@ impl<'top, 'data: 'top, D: LazyDecoder<'data>> LazyStruct<'top, 'data, D> { } /// A single field within a [`LazyStruct`]. -pub struct LazyField<'top, 'data, D: LazyDecoder<'data>> { - pub(crate) expanded_field: LazyExpandedField<'top, 'data, D>, +pub struct LazyField<'top, D: LazyDecoder> { + pub(crate) expanded_field: LazyExpandedField<'top, D>, } -impl<'top, 'data, D: LazyDecoder<'data>> Debug for LazyField<'top, 'data, D> { +impl<'top, D: LazyDecoder> Debug for LazyField<'top, D> { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { write!( f, @@ -254,10 +248,7 @@ impl<'top, 'data, D: LazyDecoder<'data>> Debug for LazyField<'top, 'data, D> { } } -impl<'top, 'data, D: LazyDecoder<'data>> LazyField<'top, 'data, D> -where - 'data: 'top, -{ +impl<'top, D: LazyDecoder> LazyField<'top, D> { /// Returns a symbol representing the name of this field. pub fn name(&self) -> IonResult> { let field_name = self.expanded_field.name(); @@ -278,27 +269,27 @@ where /// Returns a lazy value representing the value of this field. To access the value's data, /// see [`LazyValue::read`]. - pub fn value(&self) -> LazyValue<'top, 'data, D> { + pub fn value(&self) -> LazyValue<'top, D> { LazyValue { - expanded_value: self.expanded_field.value().clone(), + expanded_value: *self.expanded_field.value(), } } } -pub struct StructIterator<'top, 'data, D: LazyDecoder<'data>> { - pub(crate) expanded_struct_iter: ExpandedStructIterator<'top, 'data, D>, +pub struct StructIterator<'top, D: LazyDecoder> { + pub(crate) expanded_struct_iter: ExpandedStructIterator<'top, D>, } -impl<'top, 'data, D: LazyDecoder<'data>> Iterator for StructIterator<'top, 'data, D> { - type Item = IonResult>; +impl<'top, D: LazyDecoder> Iterator for StructIterator<'top, D> { + type Item = IonResult>; fn next(&mut self) -> Option { StructIterator::next_field(self).transpose() } } -impl<'top, 'data, D: LazyDecoder<'data>> StructIterator<'top, 'data, D> { - pub fn next_field(&mut self) -> IonResult>> { +impl<'top, D: LazyDecoder> StructIterator<'top, D> { + pub fn next_field(&mut self) -> IonResult>> { let expanded_field = match self.expanded_struct_iter.next() { Some(expanded_field) => expanded_field?, None => return Ok(None), @@ -309,10 +300,10 @@ impl<'top, 'data, D: LazyDecoder<'data>> StructIterator<'top, 'data, D> { } } -impl<'top, 'data, D: LazyDecoder<'data>> TryFrom> for Struct { +impl<'top, D: LazyDecoder> TryFrom> for Struct { type Error = IonError; - fn try_from(lazy_struct: LazyStruct<'top, 'data, D>) -> Result { + fn try_from(lazy_struct: LazyStruct<'top, D>) -> Result { let mut builder = StructBuilder::new(); for field in &lazy_struct { let field = field?; @@ -322,19 +313,19 @@ impl<'top, 'data, D: LazyDecoder<'data>> TryFrom> for } } -impl<'top, 'data, D: LazyDecoder<'data>> TryFrom> for Element { +impl<'top, D: LazyDecoder> TryFrom> for Element { type Error = IonError; - fn try_from(lazy_struct: LazyStruct<'top, 'data, D>) -> Result { + fn try_from(lazy_struct: LazyStruct<'top, D>) -> Result { let annotations: Annotations = lazy_struct.annotations().try_into()?; let struct_: Struct = lazy_struct.try_into()?; Ok(struct_.with_annotations(annotations)) } } -impl<'a, 'top, 'data: 'top, D: LazyDecoder<'data>> IntoIterator for &'a LazyStruct<'top, 'data, D> { - type Item = IonResult>; - type IntoIter = StructIterator<'top, 'data, D>; +impl<'a, 'top, 'data: 'top, D: LazyDecoder> IntoIterator for &'a LazyStruct<'top, D> { + type Item = IonResult>; + type IntoIter = StructIterator<'top, D>; fn into_iter(self) -> Self::IntoIter { self.iter() diff --git a/src/lazy/system_reader.rs b/src/lazy/system_reader.rs index 1dc7d77c..034a4778 100644 --- a/src/lazy/system_reader.rs +++ b/src/lazy/system_reader.rs @@ -1,19 +1,11 @@ #![allow(non_camel_case_types)] -use std::cell::RefCell; - -use bumpalo::Bump as BumpAllocator; - use crate::lazy::any_encoding::{AnyEncoding, LazyRawAnyReader}; use crate::lazy::binary::raw::reader::LazyRawBinaryReader; use crate::lazy::decoder::LazyDecoder; use crate::lazy::decoder::LazyRawReader; use crate::lazy::encoding::{BinaryEncoding_1_0, TextEncoding_1_0, TextEncoding_1_1}; -use crate::lazy::expanded::macro_table::MacroTable; -use crate::lazy::expanded::{ - EncodingContext, ExpandedStreamItem, ExpandedValueRef, LazyExpandedValue, LazyExpandingReader, -}; -use crate::lazy::r#struct::LazyStruct; +use crate::lazy::expanded::{ExpandedValueRef, LazyExpandedValue, LazyExpandingReader}; use crate::lazy::system_stream_item::SystemStreamItem; use crate::lazy::text::raw::v1_1::reader::LazyRawTextReader_1_1; use crate::lazy::value::LazyValue; @@ -38,10 +30,11 @@ const SYMBOLS: RawSymbolTokenRef = RawSymbolTokenRef::SymbolId(7); /// which may contain either a scalar value or a lazy container that may itself be traversed. /// /// The values that the reader yields ([`LazyValue`], -/// [`crate::lazy::sequence::LazyBinarySequence`], and [`LazyStruct`]) are immutable -/// references to the data stream, and remain valid until [`LazySystemReader::next_item`] is -/// called again to advance the reader to the next top level value. This means that these references -/// can be stored, read, and re-read as long as the reader remains on the same top-level value. +/// [`LazyBinarySequence`](crate::lazy::sequence::LazyBinarySequence) and +/// [`LazyStruct`](crate::lazy::struct::LazyStruct)), are immutable references to the data stream, +/// and remain valid until [`LazySystemReader::next_item`] is called again to advance the reader to +/// the next top level value. This means that these references can be stored, read, and re-read as +/// long as the reader remains on the same top-level value. /// ``` ///# use ion_rs::IonResult; ///# fn main() -> IonResult<()> { @@ -76,16 +69,8 @@ const SYMBOLS: RawSymbolTokenRef = RawSymbolTokenRef::SymbolId(7); ///# Ok(()) ///# } /// ``` -pub struct LazySystemReader<'data, D: LazyDecoder<'data>> { - // TODO: Remove this RefCell when the Polonius borrow checker is available. - // See: https://github.com/rust-lang/rust/issues/70255 - expanding_reader: RefCell>, - // TODO: Make the symbol and macro tables traits on `D` such that they can be configured - // statically. Then 1.0 types can use `Never` for the macro table. - symbol_table: SymbolTable, - macro_table: MacroTable, - allocator: BumpAllocator, - pending_lst: PendingLst, +pub struct LazySystemReader<'data, D: LazyDecoder> { + pub(crate) expanding_reader: LazyExpandingReader<'data, D>, } pub type LazySystemBinaryReader<'data> = LazySystemReader<'data, BinaryEncoding_1_0>; @@ -96,25 +81,28 @@ pub type LazySystemAnyReader<'data> = LazySystemReader<'data, AnyEncoding>; // If the reader encounters a symbol table in the stream, it will store all of the symbols that // the table defines in this structure so that they may be applied when the reader next advances. -struct PendingLst { - is_lst_append: bool, - symbols: Vec>, +#[derive(Default)] +pub(crate) struct PendingLst { + pub(crate) has_changes: bool, + pub(crate) is_lst_append: bool, + pub(crate) symbols: Vec>, +} + +impl PendingLst { + pub fn new() -> Self { + Self { + has_changes: false, + is_lst_append: false, + symbols: Vec::new(), + } + } } impl<'data> LazySystemAnyReader<'data> { pub fn new(ion_data: &'data [u8]) -> LazySystemAnyReader<'data> { let raw_reader = LazyRawAnyReader::new(ion_data); let expanding_reader = LazyExpandingReader::new(raw_reader); - LazySystemReader { - expanding_reader: RefCell::new(expanding_reader), - symbol_table: SymbolTable::new(), - macro_table: MacroTable::new(), - allocator: BumpAllocator::new(), - pending_lst: PendingLst { - is_lst_append: false, - symbols: Vec::new(), - }, - } + LazySystemReader { expanding_reader } } } @@ -122,16 +110,7 @@ impl<'data> LazySystemBinaryReader<'data> { pub(crate) fn new(ion_data: &'data [u8]) -> LazySystemBinaryReader<'data> { let raw_reader = LazyRawBinaryReader::new(ion_data); let expanding_reader = LazyExpandingReader::new(raw_reader); - LazySystemReader { - expanding_reader: RefCell::new(expanding_reader), - symbol_table: SymbolTable::new(), - macro_table: MacroTable::new(), - allocator: BumpAllocator::new(), - pending_lst: PendingLst { - is_lst_append: false, - symbols: Vec::new(), - }, - } + LazySystemReader { expanding_reader } } } @@ -139,23 +118,14 @@ impl<'data> LazySystemTextReader_1_1<'data> { pub(crate) fn new(ion_data: &'data [u8]) -> LazySystemTextReader_1_1<'data> { let raw_reader = LazyRawTextReader_1_1::new(ion_data); let expanding_reader = LazyExpandingReader::new(raw_reader); - LazySystemReader { - expanding_reader: RefCell::new(expanding_reader), - symbol_table: SymbolTable::new(), - macro_table: MacroTable::new(), - allocator: BumpAllocator::new(), - pending_lst: PendingLst { - is_lst_append: false, - symbols: Vec::new(), - }, - } + LazySystemReader { expanding_reader } } } -impl<'data, D: LazyDecoder<'data>> LazySystemReader<'data, D> { +impl<'data, D: LazyDecoder> LazySystemReader<'data, D> { // Returns `true` if the provided [`LazyRawValue`] is a struct whose first annotation is // `$ion_symbol_table`. - fn is_symbol_table_struct(lazy_value: &'_ LazyExpandedValue<'_, 'data, D>) -> IonResult { + pub fn is_symbol_table_struct(lazy_value: &'_ LazyExpandedValue<'_, D>) -> IonResult { if lazy_value.ion_type() != IonType::Struct { return Ok(false); } @@ -167,114 +137,33 @@ impl<'data, D: LazyDecoder<'data>> LazySystemReader<'data, D> { /// Returns the next top-level stream item (IVM, Symbol Table, Value, or Nothing) as a /// [`SystemStreamItem`]. - pub fn next_item<'top>(&'top mut self) -> IonResult> { - // Deconstruct the reader to get simultaneous mutable references to multiple fields - let LazySystemReader { - ref expanding_reader, - ref symbol_table, - macro_table, - allocator, - pending_lst, - } = self; - Self::apply_pending_lst(symbol_table, pending_lst); - let context = EncodingContext { - macro_table, - symbol_table, - allocator, - }; - let lazy_expanded_value = match expanding_reader.borrow_mut().next(context)? { - ExpandedStreamItem::VersionMarker(major, minor) => { - return Ok(SystemStreamItem::VersionMarker(major, minor)); - } - ExpandedStreamItem::Value(lazy_raw_value) => lazy_raw_value, - ExpandedStreamItem::EndOfStream => return Ok(SystemStreamItem::EndOfStream), - }; - if Self::is_symbol_table_struct(&lazy_expanded_value)? { - Self::process_symbol_table(pending_lst, &lazy_expanded_value)?; - let lazy_struct = LazyStruct { - expanded_struct: lazy_expanded_value.read()?.expect_struct()?, - }; - return Ok(SystemStreamItem::SymbolTable(lazy_struct)); - } - let lazy_value = LazyValue::new(lazy_expanded_value); - Ok(SystemStreamItem::Value(lazy_value)) + pub fn next_item<'top>(&'top mut self) -> IonResult> + where + 'data: 'top, + { + self.expanding_reader.next_item() } /// Returns the next value that is part of the application data model, bypassing all encoding /// artifacts (IVMs, symbol tables). - // It would make more sense for this logic to live in the user-level `LazyReader` as a simple - // loop over LazySystemReader::next. However, due to a limitation in the borrow checker[1], it's - // not able to determine that calling LazySystemReader::next() multiple times in the same lexical - // scope is safe. Rust's experimental borrow checker, Polonius, is able to understand it. - // Until Polonius is available, the method will live here instead. - // [1]: https://github.com/rust-lang/rust/issues/70255 - pub fn next_value<'top>(&'top mut self) -> IonResult>> { - // Deconstruct the reader to get simultaneous mutable references to multiple fields - let LazySystemReader { - ref expanding_reader, - ref symbol_table, - macro_table, - allocator, - pending_lst, - } = self; - - loop { - Self::apply_pending_lst(symbol_table, pending_lst); - let context = EncodingContext { - symbol_table, - macro_table, - allocator, - }; - let lazy_expanded_value = match expanding_reader.borrow_mut().next(context)? { - ExpandedStreamItem::VersionMarker(_major, _minor) => { - // TODO: For text, switch the underlying reader as needed - continue; - } - ExpandedStreamItem::Value(lazy_raw_value) => lazy_raw_value, - ExpandedStreamItem::EndOfStream => return Ok(None), - }; - if Self::is_symbol_table_struct(&lazy_expanded_value)? { - Self::process_symbol_table(pending_lst, &lazy_expanded_value)?; - drop(lazy_expanded_value); - continue; - } - let lazy_value = LazyValue::new(lazy_expanded_value); - return Ok(Some(lazy_value)); - } + pub fn next_value<'top>(&'top mut self) -> IonResult>> + where + 'data: 'top, + { + self.expanding_reader.next_value() } // If the last stream item the reader visited was a symbol table, its `PendingLst` will // contain new symbols that need to be added to the local symbol table. - fn apply_pending_lst(symbol_table: &SymbolTable, pending_lst: &mut PendingLst) { - let ptr = symbol_table as *const SymbolTable; - - // XXX: This `unsafe` is a workaround for https://github.com/rust-lang/rust/issues/70255 - // There is a rustc fix for this limitation on the horizon. See: - // https://smallcultfollowing.com/babysteps/blog/2023/09/22/polonius-part-1/ - // Indeed, using the experimental `-Zpolonius` flag on the nightly compiler allows the - // version of this code without this `unsafe` hack to work. The alternative to the - // hack is wrapping the SymbolTable in something like `RefCell`, which adds a small - // amount of overhead to each access. Given that the `SymbolTable` is on the hot - // path and that a fix is inbound, I think this use of `unsafe` is warranted. - // SAFETY: At this point, the only thing that's holding potentially holding references to - // the symbol table is the lazy value that represented an LST directive. We've - // already read through that value in full to populate the `PendingLst`. Updating - // the symbol table will invalidate data in that lazy value, so we just have to take - // care not to read from it after updating the symbol table. - let symbol_table = unsafe { - let mut_ptr = ptr as *mut SymbolTable; - &mut *mut_ptr - }; - // `is_empty()` will be true if the last item was not a symbol table OR if it was a symbol - // table but did not define new symbols. In either case, there's nothing for us to do. - if pending_lst.symbols.is_empty() { - return; - } - + fn apply_pending_lst(symbol_table: &mut SymbolTable, pending_lst: &mut PendingLst) { // If the symbol table's `imports` field had a value of `$ion_symbol_table`, then we're // appending the symbols it defined to the end of our existing local symbol table. // Otherwise, we need to clear the existing table before appending the new symbols. - if !pending_lst.is_lst_append { + if pending_lst.is_lst_append { + if pending_lst.symbols.is_empty() { + return; + } + } else { // We're setting the symbols list, not appending to it. symbol_table.reset(); } @@ -287,9 +176,9 @@ impl<'data, D: LazyDecoder<'data>> LazySystemReader<'data, D> { // Traverses a symbol table, processing the `symbols` and `imports` fields as needed to // populate the `PendingLst`. - fn process_symbol_table<'top>( + pub(crate) fn process_symbol_table( pending_lst: &mut PendingLst, - symbol_table: &LazyExpandedValue<'top, 'data, D>, + symbol_table: &LazyExpandedValue<'_, D>, ) -> IonResult<()> { // We've already confirmed this is an annotated struct let symbol_table = symbol_table.read()?.expect_struct()?; @@ -323,9 +212,9 @@ impl<'data, D: LazyDecoder<'data>> LazySystemReader<'data, D> { } // Store any strings defined in the `symbols` field in the `PendingLst` for future application. - fn process_symbols<'top>( + fn process_symbols( pending_lst: &mut PendingLst, - symbols: &LazyExpandedValue<'top, 'data, D>, + symbols: &LazyExpandedValue<'_, D>, ) -> IonResult<()> { if let ExpandedValueRef::List(list) = symbols.read()? { for symbol_text_result in list.iter() { @@ -341,9 +230,9 @@ impl<'data, D: LazyDecoder<'data>> LazySystemReader<'data, D> { } // Check for `imports: $ion_symbol_table`. - fn process_imports<'top>( + fn process_imports( pending_lst: &mut PendingLst, - imports: &LazyExpandedValue<'top, 'data, D>, + imports: &LazyExpandedValue<'_, D>, ) -> IonResult<()> { match imports.read()? { ExpandedValueRef::Symbol(symbol_ref) => { diff --git a/src/lazy/system_stream_item.rs b/src/lazy/system_stream_item.rs index 8d8be4ba..23c2ac22 100644 --- a/src/lazy/system_stream_item.rs +++ b/src/lazy/system_stream_item.rs @@ -6,19 +6,20 @@ use crate::{IonError, IonResult}; use std::fmt::{Debug, Formatter}; /// System stream elements that a SystemReader may encounter. -pub enum SystemStreamItem<'top, 'data, D: LazyDecoder<'data>> { +#[non_exhaustive] +pub enum SystemStreamItem<'top, D: LazyDecoder> { /// An Ion Version Marker (IVM) indicating the Ion major and minor version that were used to /// encode the values that follow. VersionMarker(u8, u8), /// An Ion symbol table encoded as a struct annotated with `$ion_symbol_table`. - SymbolTable(LazyStruct<'top, 'data, D>), + SymbolTable(LazyStruct<'top, D>), /// An application-level Ion value - Value(LazyValue<'top, 'data, D>), + Value(LazyValue<'top, D>), /// The end of the stream EndOfStream, } -impl<'top, 'data, D: LazyDecoder<'data>> Debug for SystemStreamItem<'top, 'data, D> { +impl<'top, D: LazyDecoder> Debug for SystemStreamItem<'top, D> { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { match self { SystemStreamItem::VersionMarker(major, minor) => { @@ -31,7 +32,7 @@ impl<'top, 'data, D: LazyDecoder<'data>> Debug for SystemStreamItem<'top, 'data, } } -impl<'top, 'data, D: LazyDecoder<'data>> SystemStreamItem<'top, 'data, D> { +impl<'top, D: LazyDecoder> SystemStreamItem<'top, D> { /// If this item is an Ion version marker (IVM), returns `Some((major, minor))` indicating the /// version. Otherwise, returns `None`. pub fn version_marker(&self) -> Option<(u8, u8)> { @@ -51,7 +52,7 @@ impl<'top, 'data, D: LazyDecoder<'data>> SystemStreamItem<'top, 'data, D> { /// If this item is a application-level value, returns `Some(&LazyValue)`. Otherwise, /// returns `None`. - pub fn value(&self) -> Option<&LazyValue<'top, 'data, D>> { + pub fn value(&self) -> Option<&LazyValue<'top, D>> { if let Self::Value(value) = self { Some(value) } else { @@ -61,7 +62,7 @@ impl<'top, 'data, D: LazyDecoder<'data>> SystemStreamItem<'top, 'data, D> { /// Like [`Self::value`], but returns a [`IonError::Decoding`] if this item is not /// an application-level value. - pub fn expect_value(self) -> IonResult> { + pub fn expect_value(self) -> IonResult> { if let Self::Value(value) = self { Ok(value) } else { diff --git a/src/lazy/text/buffer.rs b/src/lazy/text/buffer.rs index ff809b67..f3c582d2 100644 --- a/src/lazy/text/buffer.rs +++ b/src/lazy/text/buffer.rs @@ -4,6 +4,7 @@ use std::ops::{Range, RangeFrom, RangeTo}; use std::slice::Iter; use std::str::FromStr; +use bumpalo::Bump as BumpAllocator; use nom::branch::alt; use nom::bytes::complete::{ is_a as complete_is_a, is_not as complete_is_not, tag as complete_tag, @@ -22,21 +23,23 @@ use nom::{AsBytes, CompareResult, IResult, InputLength, InputTake, Needed, Parse use crate::lazy::decoder::private::LazyRawValuePrivate; use crate::lazy::decoder::{LazyRawFieldExpr, LazyRawValueExpr, RawFieldExpr, RawValueExpr}; -use crate::lazy::encoding::{TextEncoding_1_0, TextEncoding_1_1}; -use crate::lazy::raw_stream_item::RawStreamItem; +use crate::lazy::encoding::{TextEncoding, TextEncoding_1_0, TextEncoding_1_1}; +use crate::lazy::never::Never; +use crate::lazy::raw_stream_item::{LazyRawStreamItem, RawStreamItem}; use crate::lazy::text::encoded_value::EncodedTextValue; use crate::lazy::text::matched::{ - MatchedBlob, MatchedClob, MatchedDecimal, MatchedFieldName, MatchedFloat, - MatchedHoursAndMinutes, MatchedInt, MatchedString, MatchedSymbol, MatchedTimestamp, - MatchedTimestampOffset, MatchedValue, + MatchedBlob, MatchedClob, MatchedDecimal, MatchedFieldName, MatchedFieldNameSyntax, + MatchedFloat, MatchedHoursAndMinutes, MatchedInt, MatchedString, MatchedSymbol, + MatchedTimestamp, MatchedTimestampOffset, MatchedValue, }; use crate::lazy::text::parse_result::{InvalidInputError, IonParseError}; use crate::lazy::text::parse_result::{IonMatchResult, IonParseResult}; use crate::lazy::text::raw::r#struct::{LazyRawTextField_1_0, RawTextStructIterator_1_0}; use crate::lazy::text::raw::sequence::{RawTextListIterator_1_0, RawTextSExpIterator_1_0}; use crate::lazy::text::raw::v1_1::reader::{ - EncodedTextMacroInvocation, MacroIdRef, RawTextListIterator_1_1, RawTextMacroInvocation, - RawTextSExpIterator_1_1, RawTextStructIterator_1_1, + EncodedTextMacroInvocation, MacroIdRef, RawTextEExpression_1_1, RawTextListIterator_1_1, + RawTextSExpIterator_1_1, RawTextStructIterator_1_1, TextListSpanFinder_1_1, + TextSExpSpanFinder_1_1, TextStructSpanFinder_1_1, }; use crate::lazy::text::value::{LazyRawTextValue_1_0, LazyRawTextValue_1_1, MatchedRawTextValue}; use crate::result::DecodingError; @@ -106,8 +109,8 @@ fn dbg_parse>( /// pair. The `match` may be either the slice of the input that was matched (represented as another /// `TextBufferView`) or a `MatchedValue` that retains information discovered during parsing that /// will be useful if the match is later fully materialized into a value. -#[derive(PartialEq, Clone, Copy)] -pub(crate) struct TextBufferView<'a> { +#[derive(Clone, Copy)] +pub(crate) struct TextBufferView<'top> { // `data` is a slice of remaining data in the larger input stream. // `offset` is the absolute position in the overall input stream where that slice begins. // @@ -115,23 +118,45 @@ pub(crate) struct TextBufferView<'a> { // └────┬────┘ // data: &[u8] // offset: 6 - data: &'a [u8], + data: &'top [u8], offset: usize, + allocator: &'top BumpAllocator, } -impl<'data> TextBufferView<'data> { +impl<'a> PartialEq for TextBufferView<'a> { + fn eq(&self, other: &Self) -> bool { + self.offset == other.offset && self.data == other.data + } +} + +impl<'top> TextBufferView<'top> { /// Constructs a new `TextBufferView` that wraps `data`, setting the view's `offset` to zero. #[inline] - pub fn new(data: &[u8]) -> TextBufferView { - Self::new_with_offset(data, 0) + pub fn new(allocator: &'top BumpAllocator, data: &'top [u8]) -> TextBufferView<'top> { + Self::new_with_offset(allocator, data, 0) } /// Constructs a new `TextBufferView` that wraps `data`, setting the view's `offset` to the /// specified value. This is useful when `data` is a slice from the middle of a larger stream. /// Note that `offset` is the index of the larger stream at which `data` begins and not an /// offset _into_ `data`. - pub fn new_with_offset(data: &[u8], offset: usize) -> TextBufferView { - TextBufferView { data, offset } + pub fn new_with_offset( + allocator: &'top BumpAllocator, + data: &'top [u8], + offset: usize, + ) -> TextBufferView<'top> { + TextBufferView { + allocator, + data, + offset, + } + } + + pub fn local_lifespan<'a>(self) -> TextBufferView<'a> + where + 'top: 'a, + { + self.slice_to_end(0) } /// Returns a subslice of the [`TextBufferView`] that starts at `offset` and continues for @@ -139,10 +164,11 @@ impl<'data> TextBufferView<'data> { /// /// Note that `offset` is relative to the beginning of the buffer, not the beginning of the /// larger stream of which the buffer is a piece. - pub fn slice(&self, offset: usize, length: usize) -> TextBufferView<'data> { + pub fn slice(&self, offset: usize, length: usize) -> TextBufferView<'top> { TextBufferView { data: &self.data[offset..offset + length], offset: self.offset + offset, + allocator: self.allocator, } } @@ -151,10 +177,11 @@ impl<'data> TextBufferView<'data> { /// /// Note that `offset` is relative to the beginning of the buffer, not the beginning of the /// larger stream of which the buffer is a piece. - pub fn slice_to_end(&self, offset: usize) -> TextBufferView<'data> { + pub fn slice_to_end(&self, offset: usize) -> TextBufferView<'top> { TextBufferView { data: &self.data[offset..], offset: self.offset + offset, + allocator: self.allocator, } } @@ -180,7 +207,7 @@ impl<'data> TextBufferView<'data> { } /// Attempts to view the contents of the buffer as a UTF-8 `&str`. - pub fn as_text<'a>(&'a self) -> IonResult<&'data str> { + pub fn as_text<'a>(&'a self) -> IonResult<&'top str> { // On its surface, this method very closely resembles the `AsUtf8` trait's method. // However, this one returns a `&'data str` instead of a `&'a str`, which is to say // that the string that's returned lives as long as the data itself, not just the duration @@ -192,20 +219,20 @@ impl<'data> TextBufferView<'data> { }) } - pub fn match_whitespace(self) -> IonMatchResult<'data> { + pub fn match_whitespace(self) -> IonMatchResult<'top> { complete_is_a(WHITESPACE_CHARACTERS_AS_STR)(self) } /// Always succeeds and consumes none of the input. Returns an empty slice of the buffer. // This method is useful for parsers that need to match an optional construct but don't want // to return an Option<_>. For an example, see its use in `match_optional_whitespace`. - fn match_nothing(self) -> IonMatchResult<'data> { + fn match_nothing(self) -> IonMatchResult<'top> { // Use nom's `success` parser to return an empty slice from the head position success(self.slice(0, 0))(self) } /// Matches zero or more whitespace characters. - pub fn match_optional_whitespace(self) -> IonMatchResult<'data> { + pub fn match_optional_whitespace(self) -> IonMatchResult<'top> { // Either match whitespace and return what follows or just return the input as-is. // This will always return `Ok`, but it is packaged as an IonMatchResult for compatability // with other parsers. @@ -213,7 +240,7 @@ impl<'data> TextBufferView<'data> { } /// Matches any amount of contiguous comments and whitespace, including none. - pub fn match_optional_comments_and_whitespace(self) -> IonMatchResult<'data> { + pub fn match_optional_comments_and_whitespace(self) -> IonMatchResult<'top> { recognize(many0_count(alt(( Self::match_whitespace, Self::match_comment, @@ -226,7 +253,7 @@ impl<'data> TextBufferView<'data> { /// /* multi /// line */ /// comment - pub fn match_comment(self) -> IonMatchResult<'data> { + pub fn match_comment(self) -> IonMatchResult<'top> { alt(( Self::match_rest_of_line_comment, Self::match_multiline_comment, @@ -234,7 +261,7 @@ impl<'data> TextBufferView<'data> { } /// Matches a single rest-of-the-line comment. - fn match_rest_of_line_comment(self) -> IonMatchResult<'data> { + fn match_rest_of_line_comment(self) -> IonMatchResult<'top> { preceded( // Matches a leading "//"... complete_tag("//"), @@ -250,7 +277,7 @@ impl<'data> TextBufferView<'data> { } /// Matches a single multiline comment. - fn match_multiline_comment(self) -> IonMatchResult<'data> { + fn match_multiline_comment(self) -> IonMatchResult<'top> { recognize(delimited( // Matches a leading "/*"... complete_tag("/*"), @@ -262,7 +289,7 @@ impl<'data> TextBufferView<'data> { } /// Matches an Ion version marker (e.g. `$ion_1_0` or `$ion_1_1`.) - pub fn match_ivm(self) -> IonParseResult<'data, (u8, u8)> { + pub fn match_ivm(self) -> IonParseResult<'top, (u8, u8)> { let (remaining, (major, minor)) = terminated( preceded( complete_tag("$ion_"), @@ -290,12 +317,12 @@ impl<'data> TextBufferView<'data> { } /// Matches one or more annotations. - pub fn match_annotations(self) -> IonMatchResult<'data> { + pub fn match_annotations(self) -> IonMatchResult<'top> { recognize(many1_count(Self::match_annotation))(self) } /// Matches an annotation (symbol token) and a terminating '::'. - pub fn match_annotation(self) -> IonParseResult<'data, (MatchedSymbol, Range)> { + pub fn match_annotation(self) -> IonParseResult<'top, (MatchedSymbol, Range)> { terminated( whitespace_and_then(match_and_span(Self::match_symbol)), whitespace_and_then(complete_tag("::")), @@ -303,7 +330,9 @@ impl<'data> TextBufferView<'data> { } /// Matches an optional annotations sequence and a value, including operators. - pub fn match_sexp_value(self) -> IonParseResult<'data, Option>> { + pub fn match_sexp_value( + self, + ) -> IonParseResult<'top, Option>> { whitespace_and_then(alt(( value(None, tag(")")), pair( @@ -333,7 +362,7 @@ impl<'data> TextBufferView<'data> { /// * An optional annotations sequence and a value pub fn match_sexp_value_1_1( self, - ) -> IonParseResult<'data, Option>> { + ) -> IonParseResult<'top, Option>> { whitespace_and_then(alt(( Self::match_e_expression.map(|matched| Some(RawValueExpr::MacroInvocation(matched))), value(None, tag(")")), @@ -366,7 +395,7 @@ impl<'data> TextBufferView<'data> { /// /// If a pair is found, returns `Some(field)` and consumes the following comma if present. /// If no pair is found (that is: the end of the struct is next), returns `None`. - pub fn match_struct_field(self) -> IonParseResult<'data, Option>> { + pub fn match_struct_field(self) -> IonParseResult<'top, Option>> { // A struct field can have leading whitespace, but we want the buffer slice that we match // to begin with the field name. Here we skip any whitespace so we have another named // slice (`input_including_field_name`) with that property. @@ -375,27 +404,25 @@ impl<'data> TextBufferView<'data> { // If the next thing in the input is a `}`, return `None`. value(None, Self::match_struct_end), // Otherwise, match a name/value pair and turn it into a `LazyRawTextField`. - Self::match_struct_field_name_and_value.map( - move |((name_syntax, name_span), mut value)| { - // Add the field name offsets to the `EncodedTextValue` - value.encoded_value = value.encoded_value.with_field_name( - name_syntax, - name_span.start, - name_span.len(), - ); - // Replace the value's buffer slice (which starts with the value itself) with the - // buffer slice we created that begins with the field name. - value.input = input_including_field_name; - Some(LazyRawTextField_1_0 { - value: value.into(), - }) - }, - ), + Self::match_struct_field_name_and_value.map(move |(matched_field_name, mut value)| { + // Add the field name offsets to the `EncodedTextValue` + value.encoded_value = value.encoded_value.with_field_name( + matched_field_name.syntax(), + matched_field_name.span().start, + matched_field_name.span().len(), + ); + // Replace the value's buffer slice (which starts with the value itself) with the + // buffer slice we created that begins with the field name. + value.input = input_including_field_name; + Some(LazyRawTextField_1_0 { + value: value.into(), + }) + }), ))(input_including_field_name) } /// Matches any amount of whitespace followed by a closing `}`. - fn match_struct_end(self) -> IonMatchResult<'data> { + fn match_struct_end(self) -> IonMatchResult<'top> { whitespace_and_then(peek(tag("}"))).parse(self) } @@ -403,10 +430,16 @@ impl<'data> TextBufferView<'data> { /// input bytes where the field name is found, and the value. pub fn match_struct_field_name_and_value( self, - ) -> IonParseResult<'data, ((MatchedFieldName, Range), MatchedRawTextValue<'data>)> { + ) -> IonParseResult< + 'top, + ( + MatchedFieldName, + MatchedRawTextValue<'top, TextEncoding_1_0>, + ), + > { terminated( separated_pair( - whitespace_and_then(match_and_span(Self::match_struct_field_name)), + whitespace_and_then(Self::match_struct_field_name), whitespace_and_then(tag(":")), whitespace_and_then(Self::match_annotated_value), ), @@ -420,7 +453,7 @@ impl<'data> TextBufferView<'data> { /// If no pair is found (that is: the end of the struct is next), returns `None`. pub fn match_struct_field_1_1( self, - ) -> IonParseResult<'data, Option>> { + ) -> IonParseResult<'top, Option>> { // A struct field can have leading whitespace, but we want the buffer slice that we match // to begin with the field name. Here we skip any whitespace so we have another named // slice (`input_including_field_name`) with that property. @@ -433,55 +466,53 @@ impl<'data> TextBufferView<'data> { whitespace_and_then(alt((tag(","), peek(tag("}"))))), ) .map(|invocation| Ok(Some(RawFieldExpr::MacroInvocation(invocation)))), - Self::match_struct_field_name_and_e_expression_1_1.map( - |((matched_name, name_span), invocation)| { - // TODO: We're discarding the name encoding information here. When we revise our field name - // storage strategy[1], we should make sure to capture this for tooling's sake. - // [1]: https://github.com/amazon-ion/ion-rust/issues/631 - let name_bytes = self.slice(name_span.start - self.offset(), name_span.len()); - let name = match matched_name.read(name_bytes) { - Ok(name) => name, - Err(e) => { - let error = - InvalidInputError::new(name_bytes).with_description(format!( - "failed to read field name associated with e-expression: {e:?}" - )); - return Err(nom::Err::Error(IonParseError::Invalid(error))); - } - }; - Ok(Some(RawFieldExpr::NameValuePair( - name, - RawValueExpr::MacroInvocation(invocation), - ))) - }, - ), + Self::match_struct_field_name_and_e_expression_1_1.map(|(field_name, invocation)| { + // TODO: We're discarding the name encoding information here. When we revise our field name + // storage strategy[1], we should make sure to capture this for tooling's sake. + // [1]: https://github.com/amazon-ion/ion-rust/issues/631 + let name_bytes = self.slice( + field_name.span().start - self.offset(), + field_name.span().len(), + ); + let name = match field_name.read(name_bytes) { + Ok(name) => name, + Err(e) => { + let error = InvalidInputError::new(name_bytes).with_description(format!( + "failed to read field name associated with e-expression: {e:?}" + )); + return Err(nom::Err::Error(IonParseError::Invalid(error))); + } + }; + Ok(Some(RawFieldExpr::NameValuePair( + name, + RawValueExpr::MacroInvocation(invocation), + ))) + }), // Otherwise, match a name/value pair and turn it into a `LazyRawTextField`. - Self::match_struct_field_name_and_value_1_1.map( - move |((name_syntax, name_span), mut value)| { - // Add the field name offsets to the `EncodedTextValue` - value.encoded_value = value.encoded_value.with_field_name( - name_syntax, - name_span.start, - name_span.len(), - ); - // Replace the value's buffer slice (which starts with the value itself) with the - // buffer slice we created that begins with the field name. - value.input = input_including_field_name; - let field_name = match value.field_name() { - Ok(name) => name, - Err(e) => { - let error = InvalidInputError::new(self) - .with_description(format!("failed to struct field name: {e:?}")); - return Err(nom::Err::Error(IonParseError::Invalid(error))); - } - }; - let field_value = LazyRawTextValue_1_1::new(value); - Ok(Some(RawFieldExpr::NameValuePair( - field_name, - RawValueExpr::ValueLiteral(field_value), - ))) - }, - ), + Self::match_struct_field_name_and_value_1_1.map(move |(field_name, mut value)| { + // Add the field name offsets to the `EncodedTextValue` + value.encoded_value = value.encoded_value.with_field_name( + field_name.syntax(), + field_name.span().start, + field_name.span().len(), + ); + // Replace the value's buffer slice (which starts with the value itself) with the + // buffer slice we created that begins with the field name. + value.input = input_including_field_name; + let field_name = match value.field_name() { + Ok(name) => name, + Err(e) => { + let error = InvalidInputError::new(self) + .with_description(format!("failed to struct field name: {e:?}")); + return Err(nom::Err::Error(IonParseError::Invalid(error))); + } + }; + let field_value = LazyRawTextValue_1_1::new(value); + Ok(Some(RawFieldExpr::NameValuePair( + field_name, + RawValueExpr::ValueLiteral(field_value), + ))) + }), ))(input_including_field_name)?; Ok((input_after_field, field_expr_result?)) } @@ -491,16 +522,10 @@ impl<'data> TextBufferView<'data> { /// range of input bytes where the field name is found, and the value. pub fn match_struct_field_name_and_e_expression_1_1( self, - ) -> IonParseResult< - 'data, - ( - (MatchedFieldName, Range), - RawTextMacroInvocation<'data>, - ), - > { + ) -> IonParseResult<'top, (MatchedFieldName, RawTextEExpression_1_1<'top>)> { terminated( separated_pair( - whitespace_and_then(match_and_span(Self::match_struct_field_name)), + whitespace_and_then(Self::match_struct_field_name), whitespace_and_then(tag(":")), whitespace_and_then(Self::match_e_expression), ), @@ -513,10 +538,16 @@ impl<'data> TextBufferView<'data> { /// range of input bytes where the field name is found, and the value. pub fn match_struct_field_name_and_value_1_1( self, - ) -> IonParseResult<'data, ((MatchedFieldName, Range), MatchedRawTextValue<'data>)> { + ) -> IonParseResult< + 'top, + ( + MatchedFieldName, + MatchedRawTextValue<'top, TextEncoding_1_1>, + ), + > { terminated( separated_pair( - whitespace_and_then(match_and_span(Self::match_struct_field_name)), + whitespace_and_then(Self::match_struct_field_name), whitespace_and_then(tag(":")), whitespace_and_then(Self::match_annotated_value_1_1), ), @@ -525,7 +556,9 @@ impl<'data> TextBufferView<'data> { } /// Matches an optional annotation sequence and a trailing value. - pub fn match_annotated_value(self) -> IonParseResult<'data, MatchedRawTextValue<'data>> { + pub fn match_annotated_value( + self, + ) -> IonParseResult<'top, MatchedRawTextValue<'top, TextEncoding_1_0>> { pair( opt(Self::match_annotations), whitespace_and_then(Self::match_value), @@ -544,7 +577,9 @@ impl<'data> TextBufferView<'data> { } /// Matches an optional annotation sequence and a trailing v1.1 value. - pub fn match_annotated_value_1_1(self) -> IonParseResult<'data, MatchedRawTextValue<'data>> { + pub fn match_annotated_value_1_1( + self, + ) -> IonParseResult<'top, MatchedRawTextValue<'top, TextEncoding_1_1>> { pair( opt(Self::match_annotations), whitespace_and_then(Self::match_value_1_1), @@ -567,17 +602,19 @@ impl<'data> TextBufferView<'data> { /// * An identifier /// * A symbol ID /// * A short-form string - pub fn match_struct_field_name(self) -> IonParseResult<'data, MatchedFieldName> { - alt(( - Self::match_string.map(MatchedFieldName::String), - Self::match_symbol.map(MatchedFieldName::Symbol), - ))(self) + pub fn match_struct_field_name(self) -> IonParseResult<'top, MatchedFieldName> { + match_and_span(alt(( + Self::match_string.map(MatchedFieldNameSyntax::String), + Self::match_symbol.map(MatchedFieldNameSyntax::Symbol), + ))) + .map(|(syntax, span)| MatchedFieldName::new(syntax, span)) + .parse(self) } /// Matches a single top-level value, an IVM, or the end of the stream. pub fn match_top_level_item_1_0( self, - ) -> IonParseResult<'data, RawStreamItem<'data, TextEncoding_1_0>> { + ) -> IonParseResult<'top, RawStreamItem, Never>> { // If only whitespace/comments remain, we're at the end of the stream. let (input_after_ws, _ws) = self.match_optional_comments_and_whitespace()?; if input_after_ws.is_empty() { @@ -597,7 +634,7 @@ impl<'data> TextBufferView<'data> { /// the stream. pub fn match_top_level_item_1_1( self, - ) -> IonParseResult<'data, RawStreamItem<'data, TextEncoding_1_1>> { + ) -> IonParseResult<'top, LazyRawStreamItem<'top, TextEncoding_1_1>> { // If only whitespace/comments remain, we're at the end of the stream. let (input_after_ws, _ws) = self.match_optional_comments_and_whitespace()?; if input_after_ws.is_empty() { @@ -615,7 +652,7 @@ impl<'data> TextBufferView<'data> { } /// Matches a single scalar value or the beginning of a container. - pub fn match_value(self) -> IonParseResult<'data, MatchedRawTextValue<'data>> { + pub fn match_value(self) -> IonParseResult<'top, MatchedRawTextValue<'top, TextEncoding_1_0>> { alt(( // For `null` and `bool`, we use `read_` instead of `match_` because there's no additional // parsing to be done. @@ -694,19 +731,40 @@ impl<'data> TextBufferView<'data> { map( match_and_length(Self::match_list), |(matched_list, length)| { - EncodedTextValue::new(MatchedValue::List, matched_list.offset(), length) + // TODO: Cache child expressions found in 1.0 list + let not_yet_used_in_1_0 = + bumpalo::collections::Vec::new_in(self.allocator).into_bump_slice(); + EncodedTextValue::new( + MatchedValue::List(not_yet_used_in_1_0), + matched_list.offset(), + length, + ) }, ), map( match_and_length(Self::match_sexp), |(matched_list, length)| { - EncodedTextValue::new(MatchedValue::SExp, matched_list.offset(), length) + // TODO: Cache child expressions found in 1.0 sexp + let not_yet_used_in_1_0 = + bumpalo::collections::Vec::new_in(self.allocator).into_bump_slice(); + EncodedTextValue::new( + MatchedValue::SExp(not_yet_used_in_1_0), + matched_list.offset(), + length, + ) }, ), map( match_and_length(Self::match_struct), |(matched_struct, length)| { - EncodedTextValue::new(MatchedValue::Struct, matched_struct.offset(), length) + // TODO: Cache child expressions found in 1.0 struct + let not_yet_used_in_1_0 = + bumpalo::collections::Vec::new_in(self.allocator).into_bump_slice(); + EncodedTextValue::new( + MatchedValue::Struct(not_yet_used_in_1_0), + matched_struct.offset(), + length, + ) }, ), )) @@ -717,7 +775,9 @@ impl<'data> TextBufferView<'data> { .parse(self) } - pub fn match_value_1_1(self) -> IonParseResult<'data, MatchedRawTextValue<'data>> { + pub fn match_value_1_1( + self, + ) -> IonParseResult<'top, MatchedRawTextValue<'top, TextEncoding_1_1>> { alt(( // For `null` and `bool`, we use `read_` instead of `match_` because there's no additional // parsing to be done. @@ -795,20 +855,32 @@ impl<'data> TextBufferView<'data> { ), map( match_and_length(Self::match_list_1_1), - |(matched_list, length)| { - EncodedTextValue::new(MatchedValue::List, matched_list.offset(), length) + |((matched_list, child_expr_cache), length)| { + EncodedTextValue::new( + MatchedValue::List(child_expr_cache), + matched_list.offset(), + length, + ) }, ), map( match_and_length(Self::match_sexp_1_1), - |(matched_list, length)| { - EncodedTextValue::new(MatchedValue::SExp, matched_list.offset(), length) + |((matched_sexp, child_expr_cache), length)| { + EncodedTextValue::new( + MatchedValue::SExp(child_expr_cache), + matched_sexp.offset(), + length, + ) }, ), map( match_and_length(Self::match_struct_1_1), - |(matched_struct, length)| { - EncodedTextValue::new(MatchedValue::Struct, matched_struct.offset(), length) + |((matched_struct, field_expr_cache), length)| { + EncodedTextValue::new( + MatchedValue::Struct(field_expr_cache), + matched_struct.offset(), + length, + ) }, ), )) @@ -822,7 +894,7 @@ impl<'data> TextBufferView<'data> { /// Matches a list. /// /// If the input does not contain the entire list, returns `IonError::Incomplete(_)`. - pub fn match_list(self) -> IonMatchResult<'data> { + pub fn match_list(self) -> IonMatchResult<'top> { // If it doesn't start with [, it isn't a list. if self.bytes().first() != Some(&b'[') { let error = InvalidInputError::new(self); @@ -858,7 +930,15 @@ impl<'data> TextBufferView<'data> { /// /// If the input does not contain the entire list, returns `IonError::Incomplete(_)`. // TODO: DRY with `match_list` - pub fn match_list_1_1(self) -> IonMatchResult<'data> { + pub fn match_list_1_1( + self, + ) -> IonParseResult< + 'top, + ( + TextBufferView<'top>, + &'top [LazyRawValueExpr<'top, TextEncoding_1_1>], + ), + > { // If it doesn't start with [, it isn't a list. if self.bytes().first() != Some(&b'[') { let error = InvalidInputError::new(self); @@ -867,30 +947,39 @@ impl<'data> TextBufferView<'data> { // Scan ahead to find the end of this list. let list_body = self.slice_to_end(1); let sequence_iter = RawTextListIterator_1_1::new(list_body); - let span = match sequence_iter.find_span() { - Ok(span) => span, - // If the complete container isn't available, return an incomplete. - Err(IonError::Incomplete(_)) => return Err(nom::Err::Incomplete(Needed::Unknown)), - // If invalid syntax was encountered, return a failure to prevent nom from trying - // other parser kinds. - Err(e) => { - return { - let error = InvalidInputError::new(self) - .with_label("matching a v1.1 list") - .with_description(format!("{}", e)); - Err(nom::Err::Failure(IonParseError::Invalid(error))) + let (span, child_exprs) = + match TextListSpanFinder_1_1::new(self.allocator, sequence_iter).find_span() { + Ok((span, child_exprs)) => (span, child_exprs), + // If the complete container isn't available, return an incomplete. + Err(IonError::Incomplete(_)) => return Err(nom::Err::Incomplete(Needed::Unknown)), + // If invalid syntax was encountered, return a failure to prevent nom from trying + // other parser kinds. + Err(e) => { + return { + let error = InvalidInputError::new(self) + .with_label("matching a v1.1 list") + .with_description(format!("{}", e)); + Err(nom::Err::Failure(IonParseError::Invalid(error))) + } } - } - }; + }; // For the matched span, we use `self` again to include the opening `[` let matched = self.slice(0, span.len()); let remaining = self.slice_to_end(span.len()); - Ok((remaining, matched)) + Ok((remaining, (matched, child_exprs))) } // TODO: DRY with `match_sexp` - pub fn match_sexp_1_1(self) -> IonMatchResult<'data> { + pub fn match_sexp_1_1( + self, + ) -> IonParseResult< + 'top, + ( + TextBufferView<'top>, + &'top [LazyRawValueExpr<'top, TextEncoding_1_1>], + ), + > { if self.bytes().first() != Some(&b'(') { let error = InvalidInputError::new(self); return Err(nom::Err::Error(IonParseError::Invalid(error))); @@ -898,25 +987,26 @@ impl<'data> TextBufferView<'data> { // Scan ahead to find the end of this sexp let sexp_body = self.slice_to_end(1); let sexp_iter = RawTextSExpIterator_1_1::new(sexp_body); - let span = match sexp_iter.find_span(1) { - Ok(span) => span, - // If the complete container isn't available, return an incomplete. - Err(IonError::Incomplete(_)) => return Err(nom::Err::Incomplete(Needed::Unknown)), - // If invalid syntax was encountered, return a failure to prevent nom from trying - // other parser kinds. - Err(e) => { - return { - let error = InvalidInputError::new(self) - .with_label("matching a sexp") - .with_description(format!("{}", e)); - Err(nom::Err::Failure(IonParseError::Invalid(error))) + let (span, child_expr_cache) = + match TextSExpSpanFinder_1_1::new(self.allocator, sexp_iter).find_span(1) { + Ok((span, child_expr_cache)) => (span, child_expr_cache), + // If the complete container isn't available, return an incomplete. + Err(IonError::Incomplete(_)) => return Err(nom::Err::Incomplete(Needed::Unknown)), + // If invalid syntax was encountered, return a failure to prevent nom from trying + // other parser kinds. + Err(e) => { + return { + let error = InvalidInputError::new(self) + .with_label("matching a sexp") + .with_description(format!("{}", e)); + Err(nom::Err::Failure(IonParseError::Invalid(error))) + } } - } - }; + }; // For the matched span, we use `self` again to include the opening `(` let matched = self.slice(0, span.len()); let remaining = self.slice_to_end(span.len()); - Ok((remaining, matched)) + Ok((remaining, (matched, child_expr_cache))) } /// Matches a single value in a list OR the end of the list, allowing for leading whitespace @@ -924,7 +1014,9 @@ impl<'data> TextBufferView<'data> { /// /// If a value is found, returns `Ok(Some(value))`. If the end of the list is found, returns /// `Ok(None)`. - pub fn match_list_value(self) -> IonParseResult<'data, Option>> { + pub fn match_list_value( + self, + ) -> IonParseResult<'top, Option>> { preceded( // Some amount of whitespace/comments... Self::match_optional_comments_and_whitespace, @@ -946,7 +1038,7 @@ impl<'data> TextBufferView<'data> { /// * An optional annotations sequence and a value pub fn match_list_value_1_1( self, - ) -> IonParseResult<'data, Option>> { + ) -> IonParseResult<'top, Option>> { whitespace_and_then(alt(( terminated( Self::match_e_expression, @@ -968,7 +1060,7 @@ impl<'data> TextBufferView<'data> { /// Matches syntax that is expected to follow a value in a list: any amount of whitespace and/or /// comments followed by either a comma (consumed) or an end-of-list `]` (not consumed). - fn match_delimiter_after_list_value(self) -> IonMatchResult<'data> { + fn match_delimiter_after_list_value(self) -> IonMatchResult<'top> { preceded( Self::match_optional_comments_and_whitespace, alt((tag(","), peek(tag("]")))), @@ -978,7 +1070,7 @@ impl<'data> TextBufferView<'data> { /// Matches an s-expression (sexp). /// /// If the input does not contain the entire s-expression, returns `IonError::Incomplete(_)`. - pub fn match_sexp(self) -> IonMatchResult<'data> { + pub fn match_sexp(self) -> IonMatchResult<'top> { if self.bytes().first() != Some(&b'(') { let error = InvalidInputError::new(self); return Err(nom::Err::Error(IonParseError::Invalid(error))); @@ -1010,7 +1102,7 @@ impl<'data> TextBufferView<'data> { /// Matches a struct. /// /// If the input does not contain the entire struct, returns `IonError::Incomplete(_)`. - pub fn match_struct(self) -> IonMatchResult<'data> { + pub fn match_struct(self) -> IonMatchResult<'top> { // If it doesn't start with {, it isn't a struct. if self.bytes().first() != Some(&b'{') { let error = InvalidInputError::new(self); @@ -1041,7 +1133,15 @@ impl<'data> TextBufferView<'data> { Ok((remaining, matched)) } - pub fn match_struct_1_1(self) -> IonMatchResult<'data> { + pub fn match_struct_1_1( + self, + ) -> IonParseResult< + 'top, + ( + TextBufferView<'top>, + &'top [LazyRawFieldExpr<'top, TextEncoding_1_1>], + ), + > { // If it doesn't start with {, it isn't a struct. if self.bytes().first() != Some(&b'{') { let error = InvalidInputError::new(self); @@ -1050,32 +1150,33 @@ impl<'data> TextBufferView<'data> { // Scan ahead to find the end of this struct. let struct_body = self.slice_to_end(1); let struct_iter = RawTextStructIterator_1_1::new(struct_body); - let span = match struct_iter.find_span() { - Ok(span) => span, - // If the complete container isn't available, return an incomplete. - Err(IonError::Incomplete(_)) => return Err(nom::Err::Incomplete(Needed::Unknown)), - // If invalid syntax was encountered, return a failure to prevent nom from trying - // other parser kinds. - Err(e) => { - return { - let error = InvalidInputError::new(self) - .with_label("matching a v1.1 struct") - .with_description(format!("{}", e)); - Err(nom::Err::Failure(IonParseError::Invalid(error))) + let (span, fields) = + match TextStructSpanFinder_1_1::new(self.allocator, struct_iter).find_span() { + Ok((span, fields)) => (span, fields), + // If the complete container isn't available, return an incomplete. + Err(IonError::Incomplete(_)) => return Err(nom::Err::Incomplete(Needed::Unknown)), + // If invalid syntax was encountered, return a failure to prevent nom from trying + // other parser kinds. + Err(e) => { + return { + let error = InvalidInputError::new(self) + .with_label("matching a v1.1 struct") + .with_description(format!("{}", e)); + Err(nom::Err::Failure(IonParseError::Invalid(error))) + } } - } - }; + }; // For the matched span, we use `self` again to include the opening `{` let matched = self.slice(0, span.len()); let remaining = self.slice_to_end(span.len()); - Ok((remaining, matched)) + Ok((remaining, (matched, fields))) } /// Matches an e-expression invoking a macro. /// /// If the input does not contain the entire e-expression, returns `IonError::Incomplete(_)`. - pub fn match_e_expression(self) -> IonParseResult<'data, RawTextMacroInvocation<'data>> { + pub fn match_e_expression(self) -> IonParseResult<'top, RawTextEExpression_1_1<'top>> { let (exp_body, _) = tag("(:")(self)?; // TODO: Support macro ID kinds besides unqualified names let (exp_body_after_id, (macro_id_bytes, _matched_symbol)) = @@ -1091,8 +1192,10 @@ impl<'data> TextBufferView<'data> { // we tell the iterator how many bytes comprised the head of the expression: two bytes // for `(:` plus the length of the macro ID. let initial_bytes_skipped = 2 + macro_id_bytes.len(); - let span = match sexp_iter.find_span(initial_bytes_skipped) { - Ok(span) => span, + let (span, child_expr_cache) = match TextSExpSpanFinder_1_1::new(self.allocator, sexp_iter) + .find_span(initial_bytes_skipped) + { + Ok((span, child_expr_cache)) => (span, child_expr_cache), // If the complete container isn't available, return an incomplete. Err(IonError::Incomplete(_)) => return Err(nom::Err::Incomplete(Needed::Unknown)), // If invalid syntax was encountered, return a failure to prevent nom from trying @@ -1112,17 +1215,18 @@ impl<'data> TextBufferView<'data> { // For the matched span, we use `self` again to include the opening `(:` let matched = self.slice(0, span.len()); let remaining = self.slice_to_end(span.len()); - let macro_invocation = RawTextMacroInvocation::new( + let macro_invocation = RawTextEExpression_1_1::new( macro_id, EncodedTextMacroInvocation::new(macro_id_bytes.len() as u16), matched, + child_expr_cache, ); Ok((remaining, macro_invocation)) } /// Matches and returns a boolean value. - pub fn match_bool(self) -> IonParseResult<'data, bool> { + pub fn match_bool(self) -> IonParseResult<'top, bool> { terminated( alt((value(true, tag("true")), value(false, tag("false")))), Self::peek_stop_character, @@ -1130,7 +1234,7 @@ impl<'data> TextBufferView<'data> { } /// Matches and returns any type of null. (`null`, `null.null`, `null.int`, etc) - pub fn match_null(self) -> IonParseResult<'data, IonType> { + pub fn match_null(self) -> IonParseResult<'top, IonType> { delimited( complete_tag("null"), opt(preceded(complete_char('.'), Self::match_ion_type)), @@ -1141,7 +1245,7 @@ impl<'data> TextBufferView<'data> { } /// Matches and returns an Ion type. - fn match_ion_type(self) -> IonParseResult<'data, IonType> { + fn match_ion_type(self) -> IonParseResult<'top, IonType> { alt(( value(IonType::Null, complete_tag("null")), value(IonType::Bool, complete_tag("bool")), @@ -1160,18 +1264,18 @@ impl<'data> TextBufferView<'data> { } /// Matches any one of Ion's stop characters. - fn match_stop_character(self) -> IonMatchResult<'data> { + fn match_stop_character(self) -> IonMatchResult<'top> { alt((eof, recognize(one_of("{}[](),\"' \t\n\r\u{0b}\u{0c}"))))(self) } /// Matches--but does not consume--any one of Ion's stop characters. - fn peek_stop_character(self) -> IonMatchResult<'data> { + fn peek_stop_character(self) -> IonMatchResult<'top> { peek(Self::match_stop_character).parse(self) } /// Matches the three parts of an int--its base, its sign, and its digits--without actually /// constructing an Int from them. - pub fn match_int(self) -> IonParseResult<'data, MatchedInt> { + pub fn match_int(self) -> IonParseResult<'top, MatchedInt> { terminated( // We test for base 16 and base 2 so the '0x' or '0b' isn't confused for a leading zero // in a base 10 number, which would be illegal. @@ -1186,7 +1290,7 @@ impl<'data> TextBufferView<'data> { /// Matches a base-2 notation integer (e.g. `0b0`, `0B1010`, or `-0b0111`) and returns the /// partially parsed value as a [`MatchedInt`]. - fn match_base_2_int(self) -> IonParseResult<'data, MatchedInt> { + fn match_base_2_int(self) -> IonParseResult<'top, MatchedInt> { separated_pair( opt(char('-')), alt((complete_tag("0b"), complete_tag("0B"))), @@ -1199,7 +1303,7 @@ impl<'data> TextBufferView<'data> { } /// Matches the digits of a base-2 integer. - fn match_base_2_int_digits(self) -> IonMatchResult<'data> { + fn match_base_2_int_digits(self) -> IonMatchResult<'top> { recognize(terminated( // Zero or more digits-followed-by-underscores many0_count(pair(complete_is_a("01"), complete_tag("_"))), @@ -1210,7 +1314,7 @@ impl<'data> TextBufferView<'data> { /// Matches a base-10 notation integer (e.g. `0`, `255`, or `-1_024`) and returns the partially /// parsed value as a [`MatchedInt`]. - fn match_base_10_int(self) -> IonParseResult<'data, MatchedInt> { + fn match_base_10_int(self) -> IonParseResult<'top, MatchedInt> { pair(opt(char('-')), Self::match_base_10_int_digits) .map(|(maybe_sign, digits)| { MatchedInt::new(10, maybe_sign.is_some(), digits.offset() - self.offset()) @@ -1219,14 +1323,14 @@ impl<'data> TextBufferView<'data> { } /// Matches the digits of a base-10 integer. (i.e. An integer without a sign.) - fn match_base_10_int_digits(self) -> IonMatchResult<'data> { + fn match_base_10_int_digits(self) -> IonMatchResult<'top> { Self::match_base_10_digits_before_dot(self) } /// Matches either: /// * a zero /// * a non-zero followed by some number of digits with optional underscores - fn match_base_10_digits_before_dot(self) -> IonMatchResult<'data> { + fn match_base_10_digits_before_dot(self) -> IonMatchResult<'top> { alt(( // The number is either a zero... complete_tag("0"), @@ -1239,20 +1343,20 @@ impl<'data> TextBufferView<'data> { } /// Matches the first digit of a multi-digit base-10 integer. (i.e. Any digit but zero.) - fn match_base_10_leading_digit(self) -> IonMatchResult<'data> { + fn match_base_10_leading_digit(self) -> IonMatchResult<'top> { recognize(one_of("123456789"))(self) } /// Matches any number of digits with underscores optionally appearing in the middle. /// This parser accepts leading zeros, which is why it cannot be used for the beginning /// of a number. - fn match_base_10_trailing_digits(self) -> IonMatchResult<'data> { + fn match_base_10_trailing_digits(self) -> IonMatchResult<'top> { recognize(many0_count(pair(opt(complete_char('_')), complete_digit1)))(self) } /// Matches a base-10 notation integer (e.g. `0x0`, `0X20`, or `-0xCAFE`) and returns the /// partially parsed value as a [`MatchedInt`]. - fn match_base_16_int(self) -> IonParseResult<'data, MatchedInt> { + fn match_base_16_int(self) -> IonParseResult<'top, MatchedInt> { separated_pair( opt(char('-')), alt((complete_tag("0x"), complete_tag("0X"))), @@ -1265,7 +1369,7 @@ impl<'data> TextBufferView<'data> { } /// Matches the digits that follow the '0x' or '0X' in a base-16 integer - fn match_base_16_int_trailing_digits(self) -> IonMatchResult<'data> { + fn match_base_16_int_trailing_digits(self) -> IonMatchResult<'top> { recognize(terminated( // Zero or more digits-followed-by-underscores many0_count(pair(Self::take_base_16_digits1, complete_tag("_"))), @@ -1276,14 +1380,14 @@ impl<'data> TextBufferView<'data> { /// Recognizes 1 or more consecutive base-16 digits. // This function's "1" suffix is a style borrowed from `nom`. - fn take_base_16_digits1(self) -> IonMatchResult<'data> { + fn take_base_16_digits1(self) -> IonMatchResult<'top> { complete_take_while1(|b: u8| b.is_ascii_hexdigit())(self) } /// Matches `n` consecutive hex digits. pub(crate) fn match_n_hex_digits( count: usize, - ) -> impl Parser, TextBufferView<'data>, IonParseError<'data>> { + ) -> impl Parser, TextBufferView<'top>, IonParseError<'top>> { // `fold_many_m_n` allows us to repeat the same parser between 'm' and 'n' times, // specifying an operation to perform on each match. In our case, we just need the parser // to run 'n' times exactly so `recognize` can return the accepted slice; our operation @@ -1299,7 +1403,7 @@ impl<'data> TextBufferView<'data> { } /// Matches an Ion float of any syntax - fn match_float(self) -> IonParseResult<'data, MatchedFloat> { + fn match_float(self) -> IonParseResult<'top, MatchedFloat> { terminated( alt(( Self::match_float_special_value, @@ -1310,7 +1414,7 @@ impl<'data> TextBufferView<'data> { } /// Matches special IEEE-754 values, including +/- infinity and NaN. - fn match_float_special_value(self) -> IonParseResult<'data, MatchedFloat> { + fn match_float_special_value(self) -> IonParseResult<'top, MatchedFloat> { alt(( value(MatchedFloat::NotANumber, complete_tag("nan")), value(MatchedFloat::PositiveInfinity, complete_tag("+inf")), @@ -1319,7 +1423,7 @@ impl<'data> TextBufferView<'data> { } /// Matches numeric IEEE-754 floating point values. - fn match_float_numeric_value(self) -> IonParseResult<'data, MatchedFloat> { + fn match_float_numeric_value(self) -> IonParseResult<'top, MatchedFloat> { recognize(pair( Self::match_number_with_optional_dot_and_digits, Self::match_float_exponent_marker_and_digits, @@ -1334,7 +1438,7 @@ impl<'data> TextBufferView<'data> { /// 1000 /// 1000.559 /// -25.2 - fn match_number_with_optional_dot_and_digits(self) -> IonMatchResult<'data> { + fn match_number_with_optional_dot_and_digits(self) -> IonMatchResult<'top> { recognize(tuple(( opt(complete_tag("-")), Self::match_base_10_digits_before_dot, @@ -1344,7 +1448,7 @@ impl<'data> TextBufferView<'data> { /// In a float or decimal, matches the digits that are permitted before the decimal point. /// This includes either a single zero, or a non-zero followed by any sequence of digits. - fn match_digits_before_dot(self) -> IonMatchResult<'data> { + fn match_digits_before_dot(self) -> IonMatchResult<'top> { alt(( complete_tag("0"), recognize(pair(Self::match_leading_digit, Self::match_trailing_digits)), @@ -1352,12 +1456,12 @@ impl<'data> TextBufferView<'data> { } /// Matches a single non-zero base 10 digit. - fn match_leading_digit(self) -> IonMatchResult<'data> { + fn match_leading_digit(self) -> IonMatchResult<'top> { recognize(one_of("123456789"))(self) } /// Matches any number of base 10 digits, allowing underscores at any position except the end. - fn match_trailing_digits(self) -> IonMatchResult<'data> { + fn match_trailing_digits(self) -> IonMatchResult<'top> { recognize(many0_count(preceded( opt(complete_char('_')), complete_digit1, @@ -1365,7 +1469,7 @@ impl<'data> TextBufferView<'data> { } /// Recognizes a decimal point followed by any number of base-10 digits. - fn match_dot_followed_by_base_10_digits(self) -> IonMatchResult<'data> { + fn match_dot_followed_by_base_10_digits(self) -> IonMatchResult<'top> { recognize(preceded( complete_tag("."), opt(Self::match_digits_after_dot), @@ -1373,7 +1477,7 @@ impl<'data> TextBufferView<'data> { } /// Like `match_digits_before_dot`, but allows leading zeros. - fn match_digits_after_dot(self) -> IonMatchResult<'data> { + fn match_digits_after_dot(self) -> IonMatchResult<'top> { recognize(terminated( // Zero or more digits-followed-by-underscores many0_count(pair(complete_digit1, complete_char('_'))), @@ -1384,7 +1488,7 @@ impl<'data> TextBufferView<'data> { /// Matches an `e` or `E` followed by an optional sign (`+` or `-`) followed by one or more /// base 10 digits. - fn match_float_exponent_marker_and_digits(self) -> IonMatchResult<'data> { + fn match_float_exponent_marker_and_digits(self) -> IonMatchResult<'top> { preceded( complete_one_of("eE"), recognize(Self::match_exponent_sign_and_digits), @@ -1400,7 +1504,7 @@ impl<'data> TextBufferView<'data> { /// /// Returns a boolean indicating whether the sign was negative (vs absent or positive) /// and the buffer slice containing the digits. - fn match_exponent_sign_and_digits(self) -> IonParseResult<'data, (bool, Self)> { + fn match_exponent_sign_and_digits(self) -> IonParseResult<'top, (bool, Self)> { pair( // Optional leading sign; if there's no sign, it's not negative. opt(Self::match_any_sign).map(|s| s == Some('-')), @@ -1411,17 +1515,17 @@ impl<'data> TextBufferView<'data> { /// Matches `-` OR `+`. /// /// This is used for matching exponent signs; most places in Ion do not allow `+`. - pub fn match_any_sign(self) -> IonParseResult<'data, char> { + pub fn match_any_sign(self) -> IonParseResult<'top, char> { complete_one_of("-+")(self) } - pub fn match_decimal_exponent(self) -> IonParseResult<'data, (bool, TextBufferView<'data>)> { + pub fn match_decimal_exponent(self) -> IonParseResult<'top, (bool, TextBufferView<'top>)> { preceded(complete_one_of("dD"), Self::match_exponent_sign_and_digits)(self) } /// Match an optional sign (if present), digits before the decimal point, then digits after the /// decimal point (if present). - pub fn match_decimal(self) -> IonParseResult<'data, MatchedDecimal> { + pub fn match_decimal(self) -> IonParseResult<'top, MatchedDecimal> { terminated( tuple(( opt(complete_tag("-")), @@ -1489,12 +1593,12 @@ impl<'data> TextBufferView<'data> { } /// Matches short- or long-form string. - pub fn match_string(self) -> IonParseResult<'data, MatchedString> { + pub fn match_string(self) -> IonParseResult<'top, MatchedString> { alt((Self::match_short_string, Self::match_long_string))(self) } /// Matches a short string. For example: `"foo"` - pub(crate) fn match_short_string(self) -> IonParseResult<'data, MatchedString> { + pub(crate) fn match_short_string(self) -> IonParseResult<'top, MatchedString> { delimited(char('"'), Self::match_short_string_body, char('"')) .map(|(_matched, contains_escaped_chars)| { if contains_escaped_chars { @@ -1508,13 +1612,13 @@ impl<'data> TextBufferView<'data> { /// Returns a matched buffer and a boolean indicating whether any escaped characters were /// found in the short string. - pub(crate) fn match_short_string_body(self) -> IonParseResult<'data, (Self, bool)> { + pub(crate) fn match_short_string_body(self) -> IonParseResult<'top, (Self, bool)> { Self::match_text_until_unescaped(self, b'\"', false) } /// Matches a long string comprised of any number of `'''`-enclosed segments interleaved /// with optional comments and whitespace. - pub(crate) fn match_long_string(self) -> IonParseResult<'data, MatchedString> { + pub(crate) fn match_long_string(self) -> IonParseResult<'top, MatchedString> { fold_many1( // Parser to keep applying repeatedly whitespace_and_then(Self::match_long_string_segment), @@ -1540,7 +1644,7 @@ impl<'data> TextBufferView<'data> { } /// Matches a single long string segment enclosed by `'''` delimiters. - pub fn match_long_string_segment(self) -> IonParseResult<'data, (Self, bool)> { + pub fn match_long_string_segment(self) -> IonParseResult<'top, (Self, bool)> { delimited( complete_tag("'''"), Self::match_long_string_segment_body, @@ -1549,12 +1653,14 @@ impl<'data> TextBufferView<'data> { } /// Matches all input up to (but not including) the first unescaped instance of `'''`. - fn match_long_string_segment_body(self) -> IonParseResult<'data, (Self, bool)> { + fn match_long_string_segment_body(self) -> IonParseResult<'top, (Self, bool)> { Self::match_text_until_unescaped_str(self, "'''") } /// Matches an operator symbol, which can only legally appear within an s-expression - fn match_operator(self) -> IonParseResult<'data, MatchedRawTextValue<'data>> { + fn match_operator>( + self, + ) -> IonParseResult<'top, MatchedRawTextValue<'top, E>> { match_and_length(is_a("!#%&*+-./;<=>?@^`|~")) .map( |(text, length): (TextBufferView, usize)| MatchedRawTextValue { @@ -1570,7 +1676,7 @@ impl<'data> TextBufferView<'data> { } /// Matches a symbol ID (`$28`), an identifier (`foo`), or a quoted symbol (`'foo'`). - fn match_symbol(self) -> IonParseResult<'data, MatchedSymbol> { + fn match_symbol(self) -> IonParseResult<'top, MatchedSymbol> { alt(( Self::match_symbol_id, Self::match_identifier, @@ -1579,7 +1685,7 @@ impl<'data> TextBufferView<'data> { } /// Matches a symbol ID (`$28`). - fn match_symbol_id(self) -> IonParseResult<'data, MatchedSymbol> { + fn match_symbol_id(self) -> IonParseResult<'top, MatchedSymbol> { recognize(terminated( // Discard a `$` and parse an integer representing the symbol ID. // Note that symbol ID integers: @@ -1603,7 +1709,7 @@ impl<'data> TextBufferView<'data> { } /// Matches an identifier (`foo`). - fn match_identifier(self) -> IonParseResult<'data, MatchedSymbol> { + fn match_identifier(self) -> IonParseResult<'top, MatchedSymbol> { let (remaining, identifier_text) = recognize(terminated( pair( Self::identifier_initial_character, @@ -1631,7 +1737,7 @@ impl<'data> TextBufferView<'data> { Ok((remaining, MatchedSymbol::Identifier)) } - fn identifier_terminator(self) -> IonMatchResult<'data> { + fn identifier_terminator(self) -> IonMatchResult<'top> { alt(( eof, recognize(peek(not(Self::identifier_trailing_character))), @@ -1639,22 +1745,22 @@ impl<'data> TextBufferView<'data> { } /// Matches any character that can appear at the start of an identifier. - fn identifier_initial_character(self) -> IonParseResult<'data, Self> { + fn identifier_initial_character(self) -> IonParseResult<'top, Self> { recognize(alt((one_of("$_"), satisfy(|c| c.is_ascii_alphabetic()))))(self) } /// Matches any character that is legal in an identifier, though not necessarily at the beginning. - fn identifier_trailing_character(self) -> IonParseResult<'data, Self> { + fn identifier_trailing_character(self) -> IonParseResult<'top, Self> { recognize(alt((one_of("$_"), satisfy(|c| c.is_ascii_alphanumeric()))))(self) } /// Matches characters that are legal in an identifier, though not necessarily at the beginning. - fn identifier_trailing_characters(self) -> IonParseResult<'data, Self> { + fn identifier_trailing_characters(self) -> IonParseResult<'top, Self> { complete_take_while(|c: u8| c.is_ascii_alphanumeric() || b"$_".contains(&c))(self) } /// Matches a quoted symbol (`'foo'`). - fn match_quoted_symbol(self) -> IonParseResult<'data, MatchedSymbol> { + fn match_quoted_symbol(self) -> IonParseResult<'top, MatchedSymbol> { delimited(char('\''), Self::match_quoted_symbol_body, char('\'')) .map(|(_matched, contains_escaped_chars)| { if contains_escaped_chars { @@ -1668,7 +1774,7 @@ impl<'data> TextBufferView<'data> { /// Returns a matched buffer and a boolean indicating whether any escaped characters were /// found in the short string. - fn match_quoted_symbol_body(self) -> IonParseResult<'data, (Self, bool)> { + fn match_quoted_symbol_body(self) -> IonParseResult<'top, (Self, bool)> { Self::match_text_until_unescaped(self, b'\'', false) } @@ -1678,7 +1784,7 @@ impl<'data> TextBufferView<'data> { self, delimiter: u8, allow_unescaped_newlines: bool, - ) -> IonParseResult<'data, (Self, bool)> { + ) -> IonParseResult<'top, (Self, bool)> { let mut contains_escaped_chars = false; // This de-sugared syntax allows us to modify `iter` mid-loop. let mut iter = self.bytes().iter().copied().enumerate(); @@ -1725,7 +1831,7 @@ impl<'data> TextBufferView<'data> { byte: u8, index: usize, allow_unescaped_newlines: bool, - ) -> IonParseResult<'data, ()> { + ) -> IonParseResult<'top, ()> { if byte == b'\n' && !allow_unescaped_newlines { let error = InvalidInputError::new(self.slice_to_end(index)) .with_description("unescaped newlines are not allowed in short string literals"); @@ -1743,10 +1849,7 @@ impl<'data> TextBufferView<'data> { /// that is prefaced by the escape character `\`. /// /// The specified delimiter cannot be empty. - fn match_text_until_unescaped_str( - self, - delimiter: &str, - ) -> IonParseResult<'data, (Self, bool)> { + fn match_text_until_unescaped_str(self, delimiter: &str) -> IonParseResult<'top, (Self, bool)> { // The first byte in the delimiter let delimiter_head = delimiter.as_bytes()[0]; // Whether we've encountered any escapes while looking for the delimiter @@ -1777,12 +1880,12 @@ impl<'data> TextBufferView<'data> { } /// Matches a single base-10 digit, 0-9. - fn match_any_digit(self) -> IonParseResult<'data, char> { + fn match_any_digit(self) -> IonParseResult<'top, char> { satisfy(|c| c.is_ascii_digit())(self) } /// Matches a timestamp of any precision. - pub fn match_timestamp(self) -> IonParseResult<'data, MatchedTimestamp> { + pub fn match_timestamp(self) -> IonParseResult<'top, MatchedTimestamp> { alt(( Self::match_timestamp_y, Self::match_timestamp_ym, @@ -1794,7 +1897,7 @@ impl<'data> TextBufferView<'data> { } /// Matches a timestamp with year precision. - fn match_timestamp_y(self) -> IonParseResult<'data, MatchedTimestamp> { + fn match_timestamp_y(self) -> IonParseResult<'top, MatchedTimestamp> { terminated( Self::match_timestamp_year, pair(complete_tag("T"), Self::peek_stop_character), @@ -1804,7 +1907,7 @@ impl<'data> TextBufferView<'data> { } /// Matches a timestamp with month precision. - fn match_timestamp_ym(self) -> IonParseResult<'data, MatchedTimestamp> { + fn match_timestamp_ym(self) -> IonParseResult<'top, MatchedTimestamp> { terminated( pair(Self::match_timestamp_year, Self::match_timestamp_month), pair(complete_tag("T"), Self::peek_stop_character), @@ -1814,7 +1917,7 @@ impl<'data> TextBufferView<'data> { } /// Matches a timestamp with day precision. - fn match_timestamp_ymd(self) -> IonParseResult<'data, MatchedTimestamp> { + fn match_timestamp_ymd(self) -> IonParseResult<'top, MatchedTimestamp> { terminated( tuple(( Self::match_timestamp_year, @@ -1828,7 +1931,7 @@ impl<'data> TextBufferView<'data> { } /// Matches a timestamp with hour-and-minute precision. - fn match_timestamp_ymd_hm(self) -> IonParseResult<'data, MatchedTimestamp> { + fn match_timestamp_ymd_hm(self) -> IonParseResult<'top, MatchedTimestamp> { terminated( tuple(( Self::match_timestamp_year, @@ -1846,7 +1949,7 @@ impl<'data> TextBufferView<'data> { } /// Matches a timestamp with second precision. - fn match_timestamp_ymd_hms(self) -> IonParseResult<'data, MatchedTimestamp> { + fn match_timestamp_ymd_hms(self) -> IonParseResult<'top, MatchedTimestamp> { terminated( tuple(( Self::match_timestamp_year, @@ -1865,7 +1968,7 @@ impl<'data> TextBufferView<'data> { } /// Matches a timestamp with second precision, including a fractional seconds component. - fn match_timestamp_ymd_hms_fractional(self) -> IonParseResult<'data, MatchedTimestamp> { + fn match_timestamp_ymd_hms_fractional(self) -> IonParseResult<'top, MatchedTimestamp> { terminated( tuple(( Self::match_timestamp_year, @@ -1885,12 +1988,12 @@ impl<'data> TextBufferView<'data> { } /// Matches the year component of a timestamp. - fn match_timestamp_year(self) -> IonMatchResult<'data> { + fn match_timestamp_year(self) -> IonMatchResult<'top> { recognize(take_while_m_n(4, 4, |c: u8| c.is_ascii_digit()))(self) } /// Matches the month component of a timestamp, including a leading `-`. - fn match_timestamp_month(self) -> IonMatchResult<'data> { + fn match_timestamp_month(self) -> IonMatchResult<'top> { preceded( complete_tag("-"), recognize(alt(( @@ -1901,7 +2004,7 @@ impl<'data> TextBufferView<'data> { } /// Matches the day component of a timestamp, including a leading `-`. - fn match_timestamp_day(self) -> IonMatchResult<'data> { + fn match_timestamp_day(self) -> IonMatchResult<'top> { preceded( complete_tag("-"), recognize(alt(( @@ -1916,7 +2019,7 @@ impl<'data> TextBufferView<'data> { /// two-digit minute component. fn match_timestamp_hour_and_minute( self, - ) -> IonParseResult<'data, (TextBufferView<'data>, TextBufferView<'data>)> { + ) -> IonParseResult<'top, (TextBufferView<'top>, TextBufferView<'top>)> { preceded( tag("T"), separated_pair( @@ -1934,7 +2037,7 @@ impl<'data> TextBufferView<'data> { } /// Matches a leading `:`, and any two-digit second component from `00` to `59` inclusive. - fn match_timestamp_seconds(self) -> IonMatchResult<'data> { + fn match_timestamp_seconds(self) -> IonMatchResult<'top> { preceded( complete_tag(":"), recognize(pair(complete_one_of("012345"), Self::match_any_digit)), @@ -1942,12 +2045,12 @@ impl<'data> TextBufferView<'data> { } /// Matches the fractional seconds component of a timestamp, including a leading `.`. - fn match_timestamp_fractional_seconds(self) -> IonMatchResult<'data> { + fn match_timestamp_fractional_seconds(self) -> IonMatchResult<'top> { preceded(complete_tag("."), digit1)(self) } /// Matches a timestamp offset of any format. - fn match_timestamp_offset(self) -> IonParseResult<'data, MatchedTimestampOffset> { + fn match_timestamp_offset(self) -> IonParseResult<'top, MatchedTimestampOffset> { alt(( value(MatchedTimestampOffset::Zulu, complete_tag("Z")), value(MatchedTimestampOffset::Zulu, complete_tag("+00:00")), @@ -1971,7 +2074,7 @@ impl<'data> TextBufferView<'data> { /// Matches a timestamp offset encoded as a two-digit hour, a delimiting `:`, and a two-digit /// minute. - fn match_timestamp_offset_hours_and_minutes(self) -> IonParseResult<'data, (Self, Self)> { + fn match_timestamp_offset_hours_and_minutes(self) -> IonParseResult<'top, (Self, Self)> { separated_pair( // Hour recognize(alt(( @@ -1986,7 +2089,7 @@ impl<'data> TextBufferView<'data> { } /// Matches a complete blob, including the opening `{{` and closing `}}`. - pub fn match_blob(self) -> IonParseResult<'data, MatchedBlob> { + pub fn match_blob(self) -> IonParseResult<'top, MatchedBlob> { delimited( tag("{{"), // Only whitespace (not comments) can appear within the blob @@ -2000,7 +2103,7 @@ impl<'data> TextBufferView<'data> { } /// Matches a clob of either short- or long-form syntax. - pub fn match_clob(self) -> IonParseResult<'data, MatchedClob> { + pub fn match_clob(self) -> IonParseResult<'top, MatchedClob> { delimited( tag("{{"), preceded( @@ -2015,14 +2118,14 @@ impl<'data> TextBufferView<'data> { } /// Matches the body (inside the `{{` and `}}`) of a short-form clob. - fn match_short_clob_body(self) -> IonMatchResult<'data> { + fn match_short_clob_body(self) -> IonMatchResult<'top> { let (remaining, (body, _matched_string)) = consumed(Self::match_short_string)(self)?; body.validate_clob_text()?; Ok((remaining, body)) } /// Matches the body (inside the `{{` and `}}`) of a long-form clob. - fn match_long_clob_body(self) -> IonMatchResult<'data> { + fn match_long_clob_body(self) -> IonMatchResult<'top> { recognize(many1_count(preceded( Self::match_optional_whitespace, Self::match_long_clob_body_segment, @@ -2030,14 +2133,14 @@ impl<'data> TextBufferView<'data> { } /// Matches a single segment of a long-form clob's content. - fn match_long_clob_body_segment(self) -> IonMatchResult<'data> { + fn match_long_clob_body_segment(self) -> IonMatchResult<'top> { let (remaining, (body, _matched_string)) = consumed(Self::match_long_string_segment)(self)?; body.validate_clob_text()?; Ok((remaining, body)) } /// Returns an error if the buffer contains any byte that is not legal inside a clob. - fn validate_clob_text(self) -> IonMatchResult<'data> { + fn validate_clob_text(self) -> IonMatchResult<'top> { for byte in self.bytes().iter().copied() { if !Self::byte_is_legal_clob_ascii(byte) { let message = format!("found an illegal byte '{:0x}' in clob", byte); @@ -2062,7 +2165,7 @@ impl<'data> TextBufferView<'data> { /// Matches the base64 content within a blob. Ion allows the base64 content to be broken up with /// whitespace, so the matched input region may need to be stripped of whitespace before /// the data can be decoded. - fn match_base64_content(self) -> IonMatchResult<'data> { + fn match_base64_content(self) -> IonMatchResult<'top> { recognize(terminated( many0_count(preceded( Self::match_optional_whitespace, @@ -2093,8 +2196,9 @@ impl<'data> nom::InputTake for TextBufferView<'data> { fn take_split(&self, count: usize) -> (Self, Self) { let (before, after) = self.data.split_at(count); - let buffer_before = TextBufferView::new_with_offset(before, self.offset()); - let buffer_after = TextBufferView::new_with_offset(after, self.offset() + count); + let buffer_before = TextBufferView::new_with_offset(self.allocator, before, self.offset()); + let buffer_after = + TextBufferView::new_with_offset(self.allocator, after, self.offset() + count); // Nom's convention is to place the remaining portion of the buffer first, which leads to // a potentially surprising reversed tuple order. (buffer_after, buffer_before) @@ -2298,11 +2402,14 @@ where #[cfg(test)] mod tests { + use rstest::rstest; + use super::*; /// Stores an input string that can be tested against a given parser. struct MatchTest { input: String, + allocator: BumpAllocator, } impl MatchTest { @@ -2311,6 +2418,7 @@ mod tests { fn new(input: &str) -> Self { MatchTest { input: input.to_string(), + allocator: BumpAllocator::new(), } } @@ -2318,7 +2426,7 @@ mod tests { where P: Parser, O, IonParseError<'data>>, { - let buffer = TextBufferView::new(self.input.as_bytes()); + let buffer = TextBufferView::new(&self.allocator, self.input.as_bytes()); match_length(parser).parse(buffer) } @@ -2841,7 +2949,6 @@ mod tests { } } - use rstest::rstest; #[rstest] #[case::simple_e_exp("(:foo)")] #[case::e_exp_in_e_exp("(:foo (:bar 1))")] @@ -2965,7 +3072,8 @@ mod tests { } fn test_match_text_until_unescaped_str() { - let input = TextBufferView::new(r" foo bar \''' baz''' quux ".as_bytes()); + let allocator = BumpAllocator::new(); + let input = TextBufferView::new(&allocator, r" foo bar \''' baz''' quux ".as_bytes()); let (_remaining, (matched, contains_escapes)) = input.match_text_until_unescaped_str(r#"'''"#).unwrap(); assert_eq!(matched.as_text().unwrap(), " foo bar \\''' baz"); diff --git a/src/lazy/text/encoded_value.rs b/src/lazy/text/encoded_value.rs index a57770b6..b5daa465 100644 --- a/src/lazy/text/encoded_value.rs +++ b/src/lazy/text/encoded_value.rs @@ -1,5 +1,6 @@ +use crate::lazy::encoding::TextEncoding; use crate::lazy::text::buffer::TextBufferView; -use crate::lazy::text::matched::{MatchedFieldName, MatchedValue}; +use crate::lazy::text::matched::{MatchedFieldNameSyntax, MatchedValue}; use crate::result::IonFailure; use crate::{IonResult, IonType, RawSymbolTokenRef}; use std::ops::Range; @@ -11,7 +12,7 @@ use std::ops::Range; /// allowing a user to re-read (that is: parse) the body of the value as many times as necessary /// without re-parsing its header information each time. #[derive(Copy, Clone, Debug, PartialEq)] -pub(crate) struct EncodedTextValue { +pub(crate) struct EncodedTextValue<'top, E: TextEncoding<'top>> { // Each encoded text value has up to three components, appearing in the following order: // // [ field_name? | annotations? | data ] @@ -61,20 +62,20 @@ pub(crate) struct EncodedTextValue { // For some types (e.g. bool), matching the text is the complete parsing process so the whole // value is stored. For others (e.g. a timestamp), the various components of the value are // recognized during matching and partial information like subfield offsets can be stored here. - matched_value: MatchedValue, + matched_value: MatchedValue<'top, E>, // If this value is a struct field value, this will be populated with an enum indicating // the syntax of the associated field name. If the field name is later read, the decoder // can avoid re-parsing the input from scratch. - field_name_syntax: Option, + field_name_syntax: Option, } -impl EncodedTextValue { +impl<'top, E: TextEncoding<'top>> EncodedTextValue<'top, E> { pub(crate) fn new( - matched_value: MatchedValue, + matched_value: MatchedValue<'top, E>, offset: usize, length: usize, - ) -> EncodedTextValue { + ) -> EncodedTextValue<'top, E> { EncodedTextValue { data_offset: offset, data_length: length, @@ -96,10 +97,10 @@ impl EncodedTextValue { // $10 pub(crate) fn with_field_name( mut self, - field_name_syntax: MatchedFieldName, + field_name_syntax: MatchedFieldNameSyntax, offset: usize, length: usize, - ) -> EncodedTextValue { + ) -> EncodedTextValue<'top, E> { self.field_name_syntax = Some(field_name_syntax); self.field_name_offset = (self.data_offset - offset) as u32; self.field_name_length = length as u32; @@ -117,7 +118,7 @@ impl EncodedTextValue { mut self, offset: usize, length: usize, - ) -> EncodedTextValue { + ) -> EncodedTextValue<'top, E> { self.annotations_offset = (self.data_offset - offset) as u32; self.annotations_length = length as u32; self @@ -135,9 +136,9 @@ impl EncodedTextValue { MatchedValue::Symbol(_) => IonType::Symbol, MatchedValue::Blob(_) => IonType::Blob, MatchedValue::Clob(_) => IonType::Clob, - MatchedValue::List => IonType::List, - MatchedValue::SExp => IonType::SExp, - MatchedValue::Struct => IonType::Struct, + MatchedValue::List(_) => IonType::List, + MatchedValue::SExp(_) => IonType::SExp, + MatchedValue::Struct(_) => IonType::Struct, } } @@ -206,11 +207,11 @@ impl EncodedTextValue { self.data_length + u32::max(self.annotations_offset, self.field_name_offset) as usize } - pub fn field_name_syntax(&self) -> Option { + pub fn field_name_syntax(&self) -> Option { self.field_name_syntax } - pub fn matched(&self) -> MatchedValue { + pub fn matched(&self) -> MatchedValue<'top, E> { self.matched_value } } @@ -218,39 +219,57 @@ impl EncodedTextValue { #[cfg(test)] mod tests { use super::*; + use crate::lazy::encoding::TextEncoding_1_0; use crate::lazy::text::matched::MatchedSymbol; #[test] fn total_length_data_only() { - let value = EncodedTextValue::new(MatchedValue::Null(IonType::Null), 100, 12); + let value = + EncodedTextValue::::new(MatchedValue::Null(IonType::Null), 100, 12); assert_eq!(value.total_length(), 12); } #[test] fn total_length_data_with_field_name() { - let value = EncodedTextValue::new(MatchedValue::Null(IonType::Null), 100, 12) - .with_field_name(MatchedFieldName::Symbol(MatchedSymbol::Identifier), 90, 4); + let value = + EncodedTextValue::::new(MatchedValue::Null(IonType::Null), 100, 12) + .with_field_name( + MatchedFieldNameSyntax::Symbol(MatchedSymbol::Identifier), + 90, + 4, + ); assert_eq!(value.total_length(), 22); } #[test] fn total_length_data_with_annotations() { - let value = EncodedTextValue::new(MatchedValue::Null(IonType::Null), 100, 12) - .with_annotations_sequence(90, 4); + let value = + EncodedTextValue::::new(MatchedValue::Null(IonType::Null), 100, 12) + .with_annotations_sequence(90, 4); assert_eq!(value.total_length(), 22); } #[test] fn total_length_data_with_field_name_and_annotations() { - let value = EncodedTextValue::new(MatchedValue::Null(IonType::Null), 100, 12) - .with_field_name(MatchedFieldName::Symbol(MatchedSymbol::Identifier), 90, 4) - .with_annotations_sequence(94, 6); + let value = + EncodedTextValue::::new(MatchedValue::Null(IonType::Null), 100, 12) + .with_field_name( + MatchedFieldNameSyntax::Symbol(MatchedSymbol::Identifier), + 90, + 4, + ) + .with_annotations_sequence(94, 6); assert_eq!(value.total_length(), 22); // Same test but with extra whitespace between the components - let value = EncodedTextValue::new(MatchedValue::Null(IonType::Null), 100, 12) - .with_field_name(MatchedFieldName::Symbol(MatchedSymbol::Identifier), 80, 4) - .with_annotations_sequence(91, 6); + let value = + EncodedTextValue::::new(MatchedValue::Null(IonType::Null), 100, 12) + .with_field_name( + MatchedFieldNameSyntax::Symbol(MatchedSymbol::Identifier), + 80, + 4, + ) + .with_annotations_sequence(91, 6); assert_eq!(value.total_length(), 32, "{:?}", value); } } diff --git a/src/lazy/text/matched.rs b/src/lazy/text/matched.rs index afa23cf6..24126cae 100644 --- a/src/lazy/text/matched.rs +++ b/src/lazy/text/matched.rs @@ -21,6 +21,7 @@ use std::borrow::Cow; use std::num::IntErrorKind; +use std::ops::Range; use std::str::FromStr; use nom::branch::alt; @@ -34,6 +35,7 @@ use smallvec::SmallVec; use crate::decimal::coefficient::{Coefficient, Sign}; use crate::lazy::bytes_ref::BytesRef; +use crate::lazy::decoder::{LazyDecoder, LazyRawFieldExpr, LazyRawValueExpr}; use crate::lazy::str_ref::StrRef; use crate::lazy::text::as_utf8::AsUtf8; use crate::lazy::text::buffer::TextBufferView; @@ -45,8 +47,8 @@ use crate::{ }; /// A partially parsed Ion value. -#[derive(Clone, Copy, Debug, PartialEq)] -pub(crate) enum MatchedValue { +#[derive(Clone, Copy, Debug)] +pub(crate) enum MatchedValue<'top, D: LazyDecoder> { // `Null` and `Bool` are fully parsed because they only involve matching a keyword. Null(IonType), Bool(bool), @@ -58,31 +60,84 @@ pub(crate) enum MatchedValue { Symbol(MatchedSymbol), Blob(MatchedBlob), Clob(MatchedClob), - List, - SExp, - Struct, + List(&'top [LazyRawValueExpr<'top, D>]), + SExp(&'top [LazyRawValueExpr<'top, D>]), + Struct(&'top [LazyRawFieldExpr<'top, D>]), +} + +impl<'top, D: LazyDecoder> PartialEq for MatchedValue<'top, D> { + fn eq(&self, other: &Self) -> bool { + use MatchedValue::*; + match (self, other) { + (Null(n1), Null(n2)) => n1 == n2, + (Bool(b1), Bool(b2)) => b1 == b2, + (Int(i1), Int(i2)) => i1 == i2, + (Float(f1), Float(f2)) => f1 == f2, + (Decimal(d1), Decimal(d2)) => d1 == d2, + (Timestamp(t1), Timestamp(t2)) => t1 == t2, + (String(s1), String(s2)) => s1 == s2, + (Symbol(s1), Symbol(s2)) => s1 == s2, + (Blob(b1), Blob(b2)) => b1 == b2, + (Clob(c1), Clob(c2)) => c1 == c2, + // The container variants hold raw representations of the containers themselves. + // We cannot compare their equality without recursively reading those containers, + // which introduces many opportunities to encounter an error that this method cannot + // surface. Because this is `PartialEq`, we have the option of returning `false` for + // values that cannot be compared to one another. + _ => false, + } + } } #[derive(Copy, Clone, Debug, PartialEq)] -pub(crate) enum MatchedFieldName { +pub(crate) enum MatchedFieldNameSyntax { Symbol(MatchedSymbol), String(MatchedString), } -impl MatchedFieldName { +impl MatchedFieldNameSyntax { pub fn read<'data>( &self, matched_input: TextBufferView<'data>, ) -> IonResult> { match self { - MatchedFieldName::Symbol(matched_symbol) => matched_symbol.read(matched_input), - MatchedFieldName::String(matched_string) => { + MatchedFieldNameSyntax::Symbol(matched_symbol) => matched_symbol.read(matched_input), + MatchedFieldNameSyntax::String(matched_string) => { matched_string.read(matched_input).map(|s| s.into()) } } } } +#[derive(Copy, Clone, Debug, PartialEq)] +pub(crate) struct MatchedFieldName { + // This is stored as a tuple to allow this type to be `Copy`; Range is not `Copy`. + span: (usize, usize), + syntax: MatchedFieldNameSyntax, +} + +impl MatchedFieldName { + pub fn new(syntax: MatchedFieldNameSyntax, span: Range) -> Self { + Self { + span: (span.start, span.end), + syntax, + } + } + pub fn span(&self) -> Range { + self.span.0..self.span.1 + } + pub fn syntax(&self) -> MatchedFieldNameSyntax { + self.syntax + } + + pub fn read<'data>( + &self, + matched_input: TextBufferView<'data>, + ) -> IonResult> { + self.syntax.read(matched_input) + } +} + /// A partially parsed Ion int. #[derive(Copy, Clone, Debug, PartialEq)] pub(crate) struct MatchedInt { @@ -1145,6 +1200,7 @@ impl MatchedClob { mod tests { use std::str::FromStr; + use bumpalo::Bump as BumpAllocator; use num_bigint::BigInt; use crate::lazy::bytes_ref::BytesRef; @@ -1155,7 +1211,8 @@ mod tests { fn read_ints() -> IonResult<()> { fn expect_int(data: &str, expected: impl Into) { let expected: Int = expected.into(); - let buffer = TextBufferView::new(data.as_bytes()); + let allocator = BumpAllocator::new(); + let buffer = TextBufferView::new(&allocator, data.as_bytes()); let (_remaining, matched) = buffer.match_int().unwrap(); let actual = matched.read(buffer).unwrap(); assert_eq!( @@ -1190,7 +1247,8 @@ mod tests { fn read_timestamps() -> IonResult<()> { fn expect_timestamp(data: &str, expected: Timestamp) { let data = format!("{data} "); // Append a space - let buffer = TextBufferView::new(data.as_bytes()); + let allocator = BumpAllocator::new(); + let buffer = TextBufferView::new(&allocator, data.as_bytes()); let (_remaining, matched) = buffer.match_timestamp().unwrap(); let actual = matched.read(buffer).unwrap(); assert_eq!( @@ -1291,7 +1349,8 @@ mod tests { #[test] fn read_decimals() -> IonResult<()> { fn expect_decimal(data: &str, expected: Decimal) { - let buffer = TextBufferView::new(data.as_bytes()); + let allocator = BumpAllocator::new(); + let buffer = TextBufferView::new(&allocator, data.as_bytes()); let result = buffer.match_decimal(); assert!( result.is_ok(), @@ -1345,7 +1404,8 @@ mod tests { fn read_blobs() -> IonResult<()> { fn expect_blob(data: &str, expected: &str) { let data = format!("{data} "); // Append a space - let buffer = TextBufferView::new(data.as_bytes()); + let allocator = BumpAllocator::new(); + let buffer = TextBufferView::new(&allocator, data.as_bytes()); let (_remaining, matched) = buffer.match_blob().unwrap(); let actual = matched.read(buffer).unwrap(); assert_eq!( @@ -1382,7 +1442,8 @@ mod tests { // stream so the parser knows that the long-form strings are complete. We then trim // our fabricated value off of the input before reading. let data = format!("{data}\n0"); - let buffer = TextBufferView::new(data.as_bytes()); + let allocator = BumpAllocator::new(); + let buffer = TextBufferView::new(&allocator, data.as_bytes()); let (_remaining, matched) = buffer.match_string().unwrap(); let matched_input = buffer.slice(0, buffer.len() - 2); let actual = matched.read(matched_input).unwrap(); @@ -1417,8 +1478,8 @@ mod tests { #[test] fn read_clobs() -> IonResult<()> { - fn read_clob(data: &str) -> IonResult { - let buffer = TextBufferView::new(data.as_bytes()); + fn read_clob<'a>(allocator: &'a BumpAllocator, data: &'a str) -> IonResult> { + let buffer = TextBufferView::new(allocator, data.as_bytes()); // All `read_clob` usages should be accepted by the matcher, so we can `unwrap()` the // call to `match_clob()`. let (_remaining, matched) = buffer.match_clob().unwrap(); @@ -1426,16 +1487,16 @@ mod tests { matched.read(buffer) } - fn expect_clob_error(data: &str) { - let actual = read_clob(data); + fn expect_clob_error(allocator: &BumpAllocator, data: &str) { + let actual = read_clob(allocator, data); assert!( actual.is_err(), "Successfully read a clob from illegal input." ); } - fn expect_clob(data: &str, expected: &str) { - let result = read_clob(data); + fn expect_clob(allocator: &BumpAllocator, data: &str, expected: &str) { + let result = read_clob(allocator, data); assert!( result.is_ok(), "Unexpected read failure for input '{data}': {:?}", @@ -1482,8 +1543,10 @@ mod tests { ("{{\"foo\rbar\rbaz\"}}", "foo\rbar\rbaz"), ]; + let mut allocator = BumpAllocator::new(); for (input, expected) in tests { - expect_clob(input, expected); + expect_clob(&allocator, input, expected); + allocator.reset(); } let illegal_inputs = [ @@ -1504,7 +1567,8 @@ mod tests { ]; for input in illegal_inputs { - expect_clob_error(input); + expect_clob_error(&allocator, input); + allocator.reset(); } Ok(()) diff --git a/src/lazy/text/parse_result.rs b/src/lazy/text/parse_result.rs index 110b329f..00d93188 100644 --- a/src/lazy/text/parse_result.rs +++ b/src/lazy/text/parse_result.rs @@ -217,19 +217,24 @@ impl<'data, T> ToIteratorOutput<'data, T> for IonResult<(TextBufferView<'data>, /// or `nom::Err`) into a general-purpose `IonResult`. If the implementing type /// does not have its own `label` and `input`, the specified values will be used. pub(crate) trait AddContext<'data, T> { - fn with_context( + fn with_context<'a>( self, label: impl Into>, input: TextBufferView<'data>, - ) -> IonResult<(TextBufferView<'data>, T)>; + ) -> IonResult<(TextBufferView<'a>, T)> + where + 'data: 'a; } impl<'data, T> AddContext<'data, T> for nom::Err> { - fn with_context( + fn with_context<'a>( self, label: impl Into>, input: TextBufferView<'data>, - ) -> IonResult<(TextBufferView<'data>, T)> { + ) -> IonResult<(TextBufferView<'a>, T)> + where + 'data: 'a, + { let ipe = IonParseError::from(self); ipe.with_context(label, input) } @@ -237,11 +242,14 @@ impl<'data, T> AddContext<'data, T> for nom::Err> { // Turns an IonParseError into an IonResult impl<'data, T> AddContext<'data, T> for IonParseError<'data> { - fn with_context( + fn with_context<'a>( self, label: impl Into>, input: TextBufferView<'data>, - ) -> IonResult<(TextBufferView<'data>, T)> { + ) -> IonResult<(TextBufferView<'a>, T)> + where + 'data: 'a, + { match self { IonParseError::Incomplete => IonResult::incomplete(label, input.offset()), IonParseError::Invalid(invalid_input_error) => Err(IonError::from(invalid_input_error)), @@ -250,11 +258,14 @@ impl<'data, T> AddContext<'data, T> for IonParseError<'data> { } impl<'data, T> AddContext<'data, T> for IonParseResult<'data, T> { - fn with_context( + fn with_context<'a>( self, label: impl Into>, input: TextBufferView<'data>, - ) -> IonResult<(TextBufferView<'data>, T)> { + ) -> IonResult<(TextBufferView<'a>, T)> + where + 'data: 'a, + { match self { // No change needed in the ok case Ok(matched) => Ok(matched), diff --git a/src/lazy/text/raw/reader.rs b/src/lazy/text/raw/reader.rs index 34412239..699dba55 100644 --- a/src/lazy/text/raw/reader.rs +++ b/src/lazy/text/raw/reader.rs @@ -1,20 +1,20 @@ #![allow(non_camel_case_types)] use crate::lazy::decoder::LazyRawReader; use crate::lazy::encoding::TextEncoding_1_0; -use crate::lazy::raw_stream_item::RawStreamItem; +use crate::lazy::never::Never; +use crate::lazy::raw_stream_item::{LazyRawStreamItem, RawStreamItem}; use crate::lazy::text::buffer::TextBufferView; use crate::lazy::text::parse_result::AddContext; +use crate::lazy::text::value::LazyRawTextValue_1_0; use crate::result::IonFailure; use crate::IonResult; +use bumpalo::Bump as BumpAllocator; -/// A text Ion 1.0 reader that yields [`RawStreamItem`]s representing the top level values found +/// A text Ion 1.0 reader that yields [`LazyRawStreamItem`]s representing the top level values found /// in the provided input stream. pub struct LazyRawTextReader_1_0<'data> { - // The current view of the data we're reading from. - buffer: TextBufferView<'data>, - // Each time something is parsed from the buffer successfully, the caller will mark the number - // of bytes that may be skipped the next time the reader advances. - bytes_to_skip: usize, + input: &'data [u8], + offset: usize, } impl<'data> LazyRawTextReader_1_0<'data> { @@ -29,22 +29,27 @@ impl<'data> LazyRawTextReader_1_0<'data> { /// position of values encountered in `data`. fn new_with_offset(data: &'data [u8], offset: usize) -> LazyRawTextReader_1_0<'data> { LazyRawTextReader_1_0 { - buffer: TextBufferView::new_with_offset(data, offset), - bytes_to_skip: 0, + input: data, + offset, } } - pub fn next<'top>(&'top mut self) -> IonResult> + pub fn next<'top>( + &'top mut self, + allocator: &'top BumpAllocator, + ) -> IonResult, Never>> where 'data: 'top, { - let (buffer_after_whitespace, _whitespace) = self - .buffer + let input = + TextBufferView::new_with_offset(allocator, &self.input[self.offset..], self.offset); + let (buffer_after_whitespace, _whitespace) = input .match_optional_comments_and_whitespace() - .with_context("reading whitespace/comments at the top level", self.buffer)?; + .with_context("reading whitespace/comments at the top level", input)?; if buffer_after_whitespace.is_empty() { return Ok(RawStreamItem::EndOfStream); } + let buffer_after_whitespace = buffer_after_whitespace.local_lifespan(); let (remaining, matched_item) = buffer_after_whitespace .match_top_level_item_1_0() @@ -63,7 +68,7 @@ impl<'data> LazyRawTextReader_1_0<'data> { } // Since we successfully matched the next value, we'll update the buffer // so a future call to `next()` will resume parsing the remaining input. - self.buffer = remaining; + self.offset = remaining.offset(); Ok(matched_item) } } @@ -73,8 +78,14 @@ impl<'data> LazyRawReader<'data, TextEncoding_1_0> for LazyRawTextReader_1_0<'da LazyRawTextReader_1_0::new(data) } - fn next<'a>(&'a mut self) -> IonResult> { - self.next() + fn next<'top>( + &'top mut self, + allocator: &'top BumpAllocator, + ) -> IonResult> + where + 'data: 'top, + { + self.next(allocator) } } @@ -87,6 +98,34 @@ mod tests { use super::*; + struct TestReader<'data> { + allocator: BumpAllocator, + reader: LazyRawTextReader_1_0<'data>, + } + + impl<'data> TestReader<'data> { + fn next(&mut self) -> IonResult> { + self.reader.next(&self.allocator) + } + fn expect_next<'a>(&'a mut self, expected: RawValueRef<'a, TextEncoding_1_0>) + where + 'data: 'a, + { + let TestReader { allocator, reader } = self; + let lazy_value = reader + .next(allocator) + .expect("advancing the reader failed") + .expect_value() + .expect("expected a value"); + assert_eq!( + matches!(expected, RawValueRef::Null(_)), + lazy_value.is_null() + ); + let value_ref = lazy_value.read().expect("reading failed"); + assert_eq!(value_ref, expected, "{:?} != {:?}", value_ref, expected); + } + } + #[test] fn test_top_level() -> IonResult<()> { let mut data = String::new(); @@ -224,50 +263,36 @@ mod tests { "#, ); - fn expect_next<'data>( - reader: &mut LazyRawTextReader_1_0<'data>, - expected: RawValueRef<'data, TextEncoding_1_0>, - ) { - let lazy_value = reader - .next() - .expect("advancing the reader failed") - .expect_value() - .expect("expected a value"); - assert_eq!( - matches!(expected, RawValueRef::Null(_)), - lazy_value.is_null() - ); - let value_ref = lazy_value.read().expect("reading failed"); - assert_eq!(value_ref, expected, "{:?} != {:?}", value_ref, expected); - } - - let reader = &mut LazyRawTextReader_1_0::new(data.as_bytes()); + let reader = &mut TestReader { + reader: LazyRawTextReader_1_0::new(data.as_bytes()), + allocator: BumpAllocator::new(), + }; assert_eq!(reader.next()?.expect_ivm()?, (1, 0)); // null - expect_next(reader, RawValueRef::Null(IonType::Null)); + reader.expect_next(RawValueRef::Null(IonType::Null)); // null.bool - expect_next(reader, RawValueRef::Null(IonType::Bool)); + reader.expect_next(RawValueRef::Null(IonType::Bool)); // null.int - expect_next(reader, RawValueRef::Null(IonType::Int)); + reader.expect_next(RawValueRef::Null(IonType::Int)); // false - expect_next(reader, RawValueRef::Bool(false)); + reader.expect_next(RawValueRef::Bool(false)); // true - expect_next(reader, RawValueRef::Bool(true)); + reader.expect_next(RawValueRef::Bool(true)); // 500 - expect_next(reader, RawValueRef::Int(500.into())); + reader.expect_next(RawValueRef::Int(500.into())); // 0x20 - expect_next(reader, RawValueRef::Int(0x20.into())); + reader.expect_next(RawValueRef::Int(0x20.into())); // 0b0101 - expect_next(reader, RawValueRef::Int(0b0101.into())); + reader.expect_next(RawValueRef::Int(0b0101.into())); // +inf - expect_next(reader, RawValueRef::Float(f64::INFINITY)); + reader.expect_next(RawValueRef::Float(f64::INFINITY)); // -inf - expect_next(reader, RawValueRef::Float(f64::NEG_INFINITY)); + reader.expect_next(RawValueRef::Float(f64::NEG_INFINITY)); // nan // NaN != NaN, so we have to spell this test out a bit more assert!(reader @@ -277,117 +302,92 @@ mod tests { .expect_float()? .is_nan()); // 3.6e0 - expect_next(reader, RawValueRef::Float(3.6f64)); + reader.expect_next(RawValueRef::Float(3.6f64)); // 2.25e23 - expect_next(reader, RawValueRef::Float(2.5f64 * 10f64.powi(8))); + reader.expect_next(RawValueRef::Float(2.5f64 * 10f64.powi(8))); // -3.18 - expect_next(reader, RawValueRef::Float(-3.18f64)); + reader.expect_next(RawValueRef::Float(-3.18f64)); // 1.5 - expect_next(reader, RawValueRef::Decimal(Decimal::new(15, -1))); + reader.expect_next(RawValueRef::Decimal(Decimal::new(15, -1))); // 3.14159 - expect_next(reader, RawValueRef::Decimal(Decimal::new(314159, -5))); + reader.expect_next(RawValueRef::Decimal(Decimal::new(314159, -5))); // -6d+5 - expect_next(reader, RawValueRef::Decimal(Decimal::new(-6, 5))); + reader.expect_next(RawValueRef::Decimal(Decimal::new(-6, 5))); // 6d-5 - expect_next(reader, RawValueRef::Decimal(Decimal::new(6, -5))); + reader.expect_next(RawValueRef::Decimal(Decimal::new(6, -5))); // 2023T - expect_next( - reader, - RawValueRef::Timestamp(Timestamp::with_year(2023).build()?), - ); + reader.expect_next(RawValueRef::Timestamp(Timestamp::with_year(2023).build()?)); // 2023-08-13T - expect_next( - reader, - RawValueRef::Timestamp(Timestamp::with_ymd(2023, 8, 13).build()?), - ); + reader.expect_next(RawValueRef::Timestamp( + Timestamp::with_ymd(2023, 8, 13).build()?, + )); // 2023-08-13T21:45:30.993-05:00 - expect_next( - reader, - RawValueRef::Timestamp( - Timestamp::with_ymd(2023, 8, 13) - .with_hms(21, 45, 30) - .with_milliseconds(993) - .with_offset(-300) - .build()?, - ), - ); + reader.expect_next(RawValueRef::Timestamp( + Timestamp::with_ymd(2023, 8, 13) + .with_hms(21, 45, 30) + .with_milliseconds(993) + .with_offset(-300) + .build()?, + )); // '''Long string without escapes''' - expect_next( - reader, - RawValueRef::String("Long string without escapes".into()), - ); + reader.expect_next(RawValueRef::String("Long string without escapes".into())); // "Hello" - expect_next(reader, RawValueRef::String("Hello!".into())); + reader.expect_next(RawValueRef::String("Hello!".into())); // '''Long string with escaped \''' delimiter''' - expect_next( - reader, - RawValueRef::String("Long string with escaped ''' delimiter".into()), - ); + reader.expect_next(RawValueRef::String( + "Long string with escaped ''' delimiter".into(), + )); // "foo bar baz" - expect_next(reader, RawValueRef::String("foo bar baz".into())); + reader.expect_next(RawValueRef::String("foo bar baz".into())); // "😎😎😎" - expect_next(reader, RawValueRef::String("😎😎😎".into())); + reader.expect_next(RawValueRef::String("😎😎😎".into())); // "lol\n\r\0wat" - expect_next(reader, RawValueRef::String("lol\n\r\0wat".into())); + reader.expect_next(RawValueRef::String("lol\n\r\0wat".into())); // "\x48ello, \x77orld!" - expect_next(reader, RawValueRef::String("Hello, world!".into())); + reader.expect_next(RawValueRef::String("Hello, world!".into())); // "\u0048ello, \u0077orld!" - expect_next(reader, RawValueRef::String("Hello, world!".into())); + reader.expect_next(RawValueRef::String("Hello, world!".into())); // "\U00000048ello, \U00000077orld!" - expect_next(reader, RawValueRef::String("Hello, world!".into())); - expect_next( - reader, - RawValueRef::String("Mercury Venus Earth Mars ".into()), - ); + reader.expect_next(RawValueRef::String("Hello, world!".into())); + reader.expect_next(RawValueRef::String("Mercury Venus Earth Mars ".into())); // "\"Hello,\\\n world!\" " - expect_next(reader, RawValueRef::String("Hello, world!".into())); + reader.expect_next(RawValueRef::String("Hello, world!".into())); // 'foo' - expect_next( - reader, - RawValueRef::Symbol(RawSymbolTokenRef::Text("foo".into())), - ); - expect_next( - reader, - RawValueRef::Symbol(RawSymbolTokenRef::Text("Hello, world!".into())), - ); - expect_next( - reader, - RawValueRef::Symbol(RawSymbolTokenRef::Text("😎😎😎".into())), - ); + reader.expect_next(RawValueRef::Symbol(RawSymbolTokenRef::Text("foo".into()))); + reader.expect_next(RawValueRef::Symbol(RawSymbolTokenRef::Text( + "Hello, world!".into(), + ))); + reader.expect_next(RawValueRef::Symbol(RawSymbolTokenRef::Text( + "😎😎😎".into(), + ))); // firstName - expect_next( - reader, - RawValueRef::Symbol(RawSymbolTokenRef::Text("firstName".into())), - ); + reader.expect_next(RawValueRef::Symbol(RawSymbolTokenRef::Text( + "firstName".into(), + ))); // date_of_birth - expect_next( - reader, - RawValueRef::Symbol(RawSymbolTokenRef::Text("date_of_birth".into())), - ); + reader.expect_next(RawValueRef::Symbol(RawSymbolTokenRef::Text( + "date_of_birth".into(), + ))); // $variable - expect_next( - reader, - RawValueRef::Symbol(RawSymbolTokenRef::Text("$variable".into())), - ); + reader.expect_next(RawValueRef::Symbol(RawSymbolTokenRef::Text( + "$variable".into(), + ))); // $0 - expect_next(reader, RawValueRef::Symbol(RawSymbolTokenRef::SymbolId(0))); + reader.expect_next(RawValueRef::Symbol(RawSymbolTokenRef::SymbolId(0))); // $10 - expect_next(reader, RawValueRef::Symbol(RawSymbolTokenRef::SymbolId(10))); + reader.expect_next(RawValueRef::Symbol(RawSymbolTokenRef::SymbolId(10))); // $733 - expect_next( - reader, - RawValueRef::Symbol(RawSymbolTokenRef::SymbolId(733)), - ); + reader.expect_next(RawValueRef::Symbol(RawSymbolTokenRef::SymbolId(733))); // {{cmF6emxlIGRhenpsZSByb290IGJlZXI=}} - expect_next(reader, RawValueRef::Blob("razzle dazzle root beer".into())); + reader.expect_next(RawValueRef::Blob("razzle dazzle root beer".into())); // {{"foobarbaz"}} - expect_next(reader, RawValueRef::Clob("foobarbaz".into())); + reader.expect_next(RawValueRef::Clob("foobarbaz".into())); // {{'''foo''' '''bar''' '''baz'''}} - expect_next(reader, RawValueRef::Clob("foobarbaz".into())); + reader.expect_next(RawValueRef::Clob("foobarbaz".into())); // [1, 2, 3] let list = reader.next()?.expect_value()?.read()?.expect_list()?; diff --git a/src/lazy/text/raw/sequence.rs b/src/lazy/text/raw/sequence.rs index 8fbd28cd..38ce5013 100644 --- a/src/lazy/text/raw/sequence.rs +++ b/src/lazy/text/raw/sequence.rs @@ -165,8 +165,8 @@ impl<'data> Iterator for RawTextListIterator_1_0<'data> { // ===== S-Expressions ===== #[derive(Copy, Clone)] -pub struct LazyRawTextSExp_1_0<'data> { - pub(crate) value: LazyRawTextValue_1_0<'data>, +pub struct LazyRawTextSExp_1_0<'top> { + pub(crate) value: LazyRawTextValue_1_0<'top>, } impl<'data> LazyRawTextSExp_1_0<'data> { @@ -185,22 +185,20 @@ impl<'data> LazyRawTextSExp_1_0<'data> { } #[derive(Copy, Clone, Debug)] -pub struct RawTextSExpIterator_1_0<'data> { - input: TextBufferView<'data>, +pub struct RawTextSExpIterator_1_0<'top> { + input: TextBufferView<'top>, // If this iterator has returned an error, it should return `None` forever afterwards has_returned_error: bool, } -impl<'data> RawTextSExpIterator_1_0<'data> { - pub(crate) fn new(input: TextBufferView<'data>) -> RawTextSExpIterator_1_0<'data> { +impl<'top> RawTextSExpIterator_1_0<'top> { + pub(crate) fn new(input: TextBufferView<'top>) -> RawTextSExpIterator_1_0<'top> { RawTextSExpIterator_1_0 { input, has_returned_error: false, } } -} -impl<'data> RawTextSExpIterator_1_0<'data> { /// Scans ahead to find the end of this s-expression and reports the input span that it occupies. /// /// The `initial_bytes_skipped` parameter indicates how many bytes of input that represented the @@ -311,9 +309,12 @@ mod tests { use crate::lazy::text::raw::reader::LazyRawTextReader_1_0; use crate::IonResult; + use bumpalo::Bump as BumpAllocator; + fn expect_sequence_range(ion_data: &str, expected: Range) -> IonResult<()> { + let allocator = BumpAllocator::new(); let reader = &mut LazyRawTextReader_1_0::new(ion_data.as_bytes()); - let value = reader.next()?.expect_value()?; + let value = reader.next(&allocator)?.expect_value()?; let actual_range = value.matched.encoded_value.data_range(); assert_eq!( actual_range, expected, diff --git a/src/lazy/text/raw/struct.rs b/src/lazy/text/raw/struct.rs index f9dcaee5..e0af24fb 100644 --- a/src/lazy/text/raw/struct.rs +++ b/src/lazy/text/raw/struct.rs @@ -15,13 +15,13 @@ use crate::lazy::text::value::{LazyRawTextValue_1_0, RawTextAnnotationsIterator} use crate::{IonResult, RawSymbolTokenRef}; #[derive(Clone, Copy, Debug)] -pub struct RawTextStructIterator_1_0<'data> { - input: TextBufferView<'data>, +pub struct RawTextStructIterator_1_0<'top> { + input: TextBufferView<'top>, has_returned_error: bool, } -impl<'data> RawTextStructIterator_1_0<'data> { - pub(crate) fn new(input: TextBufferView<'data>) -> Self { +impl<'top> RawTextStructIterator_1_0<'top> { + pub(crate) fn new(input: TextBufferView<'top>) -> Self { RawTextStructIterator_1_0 { input, has_returned_error: false, @@ -67,8 +67,8 @@ impl<'data> RawTextStructIterator_1_0<'data> { } } -impl<'data> Iterator for RawTextStructIterator_1_0<'data> { - type Item = IonResult>; +impl<'top> Iterator for RawTextStructIterator_1_0<'top> { + type Item = IonResult>; fn next(&mut self) -> Option { if self.has_returned_error { @@ -93,16 +93,16 @@ impl<'data> Iterator for RawTextStructIterator_1_0<'data> { } #[derive(Clone, Copy, Debug)] -pub struct LazyRawTextField_1_0<'data> { - pub(crate) value: LazyRawTextValue_1_0<'data>, +pub struct LazyRawTextField_1_0<'top> { + pub(crate) value: LazyRawTextValue_1_0<'top>, } -impl<'data> LazyRawTextField_1_0<'data> { - pub(crate) fn new(value: LazyRawTextValue_1_0<'data>) -> Self { +impl<'top> LazyRawTextField_1_0<'top> { + pub(crate) fn new(value: LazyRawTextValue_1_0<'top>) -> Self { LazyRawTextField_1_0 { value } } - pub fn name(&self) -> RawSymbolTokenRef<'data> { + pub fn name(&self) -> RawSymbolTokenRef<'top> { // We're in a struct field, the field name _must_ be populated. // If it's not (or the field name is not a valid SID or UTF-8 string despite matching), // that's a bug. We can safely unwrap/expect here. @@ -124,46 +124,46 @@ impl<'data> LazyRawTextField_1_0<'data> { .expect("invalid struct field name") } - pub fn value(&self) -> LazyRawTextValue_1_0<'data> { + pub fn value(&self) -> LazyRawTextValue_1_0<'top> { self.value } - pub(crate) fn into_value(self) -> LazyRawTextValue_1_0<'data> { + pub(crate) fn into_value(self) -> LazyRawTextValue_1_0<'top> { self.value } } -impl<'data> LazyRawFieldPrivate<'data, TextEncoding_1_0> for LazyRawTextField_1_0<'data> { - fn into_value(self) -> LazyRawTextValue_1_0<'data> { +impl<'top> LazyRawFieldPrivate<'top, TextEncoding_1_0> for LazyRawTextField_1_0<'top> { + fn into_value(self) -> LazyRawTextValue_1_0<'top> { self.value } } -impl<'data> LazyRawField<'data, TextEncoding_1_0> for LazyRawTextField_1_0<'data> { - fn name(&self) -> RawSymbolTokenRef<'data> { +impl<'top> LazyRawField<'top, TextEncoding_1_0> for LazyRawTextField_1_0<'top> { + fn name(&self) -> RawSymbolTokenRef<'top> { LazyRawTextField_1_0::name(self) } - fn value(&self) -> LazyRawTextValue_1_0<'data> { + fn value(&self) -> LazyRawTextValue_1_0<'top> { self.value() } } #[derive(Clone, Copy, Debug)] -pub struct LazyRawTextStruct_1_0<'data> { - pub(crate) value: LazyRawTextValue_1_0<'data>, +pub struct LazyRawTextStruct_1_0<'top> { + pub(crate) value: LazyRawTextValue_1_0<'top>, } -impl<'data> LazyContainerPrivate<'data, TextEncoding_1_0> for LazyRawTextStruct_1_0<'data> { - fn from_value(value: LazyRawTextValue_1_0<'data>) -> Self { +impl<'top> LazyContainerPrivate<'top, TextEncoding_1_0> for LazyRawTextStruct_1_0<'top> { + fn from_value(value: LazyRawTextValue_1_0<'top>) -> Self { LazyRawTextStruct_1_0 { value } } } -impl<'data> LazyRawStruct<'data, TextEncoding_1_0> for LazyRawTextStruct_1_0<'data> { - type Iterator = RawTextStructIterator_1_0<'data>; +impl<'top> LazyRawStruct<'top, TextEncoding_1_0> for LazyRawTextStruct_1_0<'top> { + type Iterator = RawTextStructIterator_1_0<'top>; - fn annotations(&self) -> RawTextAnnotationsIterator<'data> { + fn annotations(&self) -> RawTextAnnotationsIterator<'top> { self.value.annotations() } @@ -175,9 +175,9 @@ impl<'data> LazyRawStruct<'data, TextEncoding_1_0> for LazyRawTextStruct_1_0<'da } } -impl<'data> IntoIterator for LazyRawTextStruct_1_0<'data> { - type Item = IonResult>; - type IntoIter = RawTextStructIterator_1_0<'data>; +impl<'top> IntoIterator for LazyRawTextStruct_1_0<'top> { + type Item = IonResult>; + type IntoIter = RawTextStructIterator_1_0<'top>; fn into_iter(self) -> Self::IntoIter { self.iter() @@ -190,10 +190,12 @@ mod tests { use crate::lazy::text::raw::reader::LazyRawTextReader_1_0; use crate::IonResult; + use bumpalo::Bump as BumpAllocator; fn expect_struct_range(ion_data: &str, expected: Range) -> IonResult<()> { + let allocator = BumpAllocator::new(); let reader = &mut LazyRawTextReader_1_0::new(ion_data.as_bytes()); - let value = reader.next()?.expect_value()?; + let value = reader.next(&allocator)?.expect_value()?; let actual_range = value.matched.encoded_value.data_range(); assert_eq!( actual_range, expected, diff --git a/src/lazy/text/raw/v1_1/reader.rs b/src/lazy/text/raw/v1_1/reader.rs index 158ffddd..cd0baec4 100644 --- a/src/lazy/text/raw/v1_1/reader.rs +++ b/src/lazy/text/raw/v1_1/reader.rs @@ -1,5 +1,6 @@ #![allow(non_camel_case_types)] +use std::fmt; use std::fmt::{Debug, Formatter}; use std::ops::Range; @@ -11,21 +12,28 @@ use crate::lazy::decoder::{ LazyRawValueExpr, RawFieldExpr, RawValueExpr, }; use crate::lazy::encoding::TextEncoding_1_1; -use crate::lazy::raw_stream_item::RawStreamItem; +use crate::lazy::raw_stream_item::{LazyRawStreamItem, RawStreamItem}; use crate::lazy::text::buffer::TextBufferView; use crate::lazy::text::parse_result::{AddContext, ToIteratorOutput}; use crate::lazy::text::value::{LazyRawTextValue_1_1, RawTextAnnotationsIterator}; use crate::result::IonFailure; use crate::{IonResult, IonType}; +use crate::lazy::expanded::macro_evaluator::RawEExpression; +use crate::lazy::text::matched::MatchedValue; +use bumpalo::collections::Vec as BumpVec; +use bumpalo::Bump as BumpAllocator; + pub struct LazyRawTextReader_1_1<'data> { - // The current view of the data we're reading from. - buffer: TextBufferView<'data>, - // Each time something is parsed from the buffer successfully, the caller will mark the number - // of bytes that may be skipped the next time the reader advances. - bytes_to_skip: usize, + input: &'data [u8], + offset: usize, } +/// The index at which this macro can be found in the macro table. +pub type MacroAddress = usize; + +/// The index at which a value expression can be found within a template's body. +pub type TemplateBodyExprAddress = usize; #[derive(Copy, Clone, Debug, PartialEq)] pub enum MacroIdRef<'data> { LocalName(&'data str), @@ -34,38 +42,46 @@ pub enum MacroIdRef<'data> { } #[derive(Copy, Clone)] -pub struct RawTextMacroInvocation<'data> { +pub struct RawTextEExpression_1_1<'top> { pub(crate) encoded_expr: EncodedTextMacroInvocation, - pub(crate) input: TextBufferView<'data>, - pub(crate) id: MacroIdRef<'data>, + pub(crate) input: TextBufferView<'top>, + pub(crate) id: MacroIdRef<'top>, + pub(crate) arg_expr_cache: &'top [LazyRawValueExpr<'top, TextEncoding_1_1>], +} + +impl<'top> RawEExpression<'top, TextEncoding_1_1> for RawTextEExpression_1_1<'top> { + type RawArgumentsIterator<'a> = RawTextSequenceCacheIterator_1_1<'top> where Self: 'a; + + fn id(&self) -> MacroIdRef<'top> { + self.id + } + + fn raw_arguments(&self) -> Self::RawArgumentsIterator<'_> { + RawTextSequenceCacheIterator_1_1::new(self.arg_expr_cache) + } } -impl<'data> Debug for RawTextMacroInvocation<'data> { +impl<'data> Debug for RawTextEExpression_1_1<'data> { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { // This is a text macro and the parser accepted it, so it's valid UTF-8. We can `unwrap()`. write!(f, "", self.input.as_text().unwrap()) } } -impl<'data> RawTextMacroInvocation<'data> { +impl<'top> RawTextEExpression_1_1<'top> { pub(crate) fn new( - id: MacroIdRef<'data>, + id: MacroIdRef<'top>, encoded_expr: EncodedTextMacroInvocation, - input: TextBufferView<'data>, + input: TextBufferView<'top>, + child_expr_cache: &'top [LazyRawValueExpr<'top, TextEncoding_1_1>], ) -> Self { Self { encoded_expr, input, id, + arg_expr_cache: child_expr_cache, } } - - /// Returns the slice of the input buffer that contains this macro expansion's arguments. - pub(crate) fn arguments_bytes(&self) -> TextBufferView<'data> { - const SMILEY_LENGTH: usize = 2; // The opening `(:` - self.input - .slice_to_end(SMILEY_LENGTH + self.encoded_expr.id_length as usize) - } } #[derive(Debug, Copy, Clone)] @@ -83,19 +99,23 @@ impl EncodedTextMacroInvocation { impl<'data> LazyRawReader<'data, TextEncoding_1_1> for LazyRawTextReader_1_1<'data> { fn new(data: &'data [u8]) -> Self { LazyRawTextReader_1_1 { - buffer: TextBufferView::new(data), - bytes_to_skip: 0, + input: data, + offset: 0, } } - fn next<'a>(&'a mut self) -> IonResult> { - let (buffer_after_whitespace, _whitespace) = self - .buffer + fn next<'top>( + &'top mut self, + allocator: &'top BumpAllocator, + ) -> IonResult> + where + 'data: 'top, + { + let input = + TextBufferView::new_with_offset(allocator, &self.input[self.offset..], self.offset); + let (buffer_after_whitespace, _whitespace) = input .match_optional_comments_and_whitespace() - .with_context( - "reading v1.1 whitespace/comments at the top level", - self.buffer, - )?; + .with_context("reading v1.1 whitespace/comments at the top level", input)?; if buffer_after_whitespace.is_empty() { return Ok(RawStreamItem::EndOfStream); } @@ -117,37 +137,68 @@ impl<'data> LazyRawReader<'data, TextEncoding_1_1> for LazyRawTextReader_1_1<'da } // Since we successfully matched the next value, we'll update the buffer // so a future call to `next()` will resume parsing the remaining input. - self.buffer = remaining; + self.offset = remaining.offset(); Ok(matched_item) } } -#[derive(Debug, Copy, Clone)] -pub struct LazyRawTextList_1_1<'data> { - pub(crate) value: LazyRawTextValue_1_1<'data>, +#[derive(Copy, Clone)] +pub struct LazyRawTextList_1_1<'top> { + pub(crate) value: LazyRawTextValue_1_1<'top>, +} + +impl<'a> Debug for LazyRawTextList_1_1<'a> { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + write!(f, "[")?; + for value in self.iter() { + write!(f, "{:?}, ", value?.expect_value()?.read()?)?; + } + write!(f, "]").unwrap(); + + Ok(()) + } } #[derive(Debug, Copy, Clone)] -pub struct RawTextListIterator_1_1<'data> { - input: TextBufferView<'data>, +pub struct RawTextListIterator_1_1<'top> { + input: TextBufferView<'top>, // If this iterator has returned an error, it should return `None` forever afterwards has_returned_error: bool, } -impl<'data> RawTextListIterator_1_1<'data> { - pub(crate) fn new(input: TextBufferView<'data>) -> Self { +impl<'top> RawTextListIterator_1_1<'top> { + pub(crate) fn new(input: TextBufferView<'top>) -> Self { Self { input, has_returned_error: false, } } +} - pub(crate) fn find_span(&self) -> IonResult> { +/// Wraps a [`RawTextListIterator_1_1`] (which parses the body of a list) and caches the child +/// expressions the iterator yields along the way. Finally, returns a `Range` representing +/// the span of input bytes that the list occupies. +pub(crate) struct TextListSpanFinder_1_1<'top> { + pub(crate) allocator: &'top bumpalo::Bump, + pub(crate) iterator: RawTextListIterator_1_1<'top>, +} + +impl<'top> TextListSpanFinder_1_1<'top> { + pub(crate) fn find_span( + &self, + ) -> IonResult<( + Range, + &'top [LazyRawValueExpr<'top, TextEncoding_1_1>], + )> { // The input has already skipped past the opening delimiter. - let start = self.input.offset() - 1; - // We need to find the input slice containing the closing delimiter. It's either... - let input_after_last = if let Some(value_expr_result) = self.last() { - let value_expr = value_expr_result?; + let start = self.iterator.input.offset() - 1; + let mut child_expr_cache = BumpVec::new_in(self.allocator); + for expr_result in self.iterator { + let expr = expr_result?; + child_expr_cache.push(expr); + } + + let input_after_last_expr = if let Some(value_expr) = child_expr_cache.last() { // ...the input slice that follows the last sequence value... match value_expr { RawValueExpr::ValueLiteral(value) => value @@ -156,18 +207,21 @@ impl<'data> RawTextListIterator_1_1<'data> { .slice_to_end(value.matched.encoded_value.total_length()), RawValueExpr::MacroInvocation(invocation) => { let end_of_expr = invocation.input.offset() + invocation.input.len(); - let remaining = self.input.slice_to_end(end_of_expr - self.input.offset()); + let remaining = self + .iterator + .input + .slice_to_end(end_of_expr - self.iterator.input.offset()); remaining } } } else { - // ...or there aren't values, so it's just the input after the opening delimiter. - self.input + // ...or there weren't any child values, so it's just the input after the opening delimiter. + self.iterator.input }; - let (mut input_after_ws, _ws) = - input_after_last - .match_optional_comments_and_whitespace() - .with_context("seeking the end of a v1.1 list", input_after_last)?; + + let (mut input_after_ws, _ws) = input_after_last_expr + .match_optional_comments_and_whitespace() + .with_context("seeking the end of a list", input_after_last_expr)?; // Skip an optional comma and more whitespace if input_after_ws.bytes().first() == Some(&b',') { (input_after_ws, _) = input_after_ws @@ -176,29 +230,46 @@ impl<'data> RawTextListIterator_1_1<'data> { .with_context("skipping a v1.1 list's trailing comma", input_after_ws)?; } let (input_after_end, _end_delimiter) = satisfy(|c| c == ']')(input_after_ws) - .with_context( - "seeking the closing delimiter of a v1.1 list", - input_after_ws, - )?; + .with_context("seeking the closing delimiter of a list", input_after_ws)?; let end = input_after_end.offset(); - Ok(start..end) + + let span = start..end; + Ok((span, child_expr_cache.into_bump_slice())) + } + pub fn new(allocator: &'top bumpalo::Bump, iterator: RawTextListIterator_1_1<'top>) -> Self { + Self { + allocator, + iterator, + } } } -#[derive(Debug, Copy, Clone)] -pub struct LazyRawTextSExp_1_1<'data> { - pub(crate) value: LazyRawTextValue_1_1<'data>, +#[derive(Copy, Clone)] +pub struct LazyRawTextSExp_1_1<'top> { + pub(crate) value: LazyRawTextValue_1_1<'top>, +} + +impl<'a> Debug for LazyRawTextSExp_1_1<'a> { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + write!(f, "(")?; + for value in self.iter() { + write!(f, "{:?} ", value?.expect_value()?.read()?)?; + } + write!(f, ")").unwrap(); + + Ok(()) + } } #[derive(Debug, Copy, Clone)] -pub struct RawTextSExpIterator_1_1<'data> { - input: TextBufferView<'data>, +pub struct RawTextSExpIterator_1_1<'top> { + input: TextBufferView<'top>, // If this iterator has returned an error, it should return `None` forever afterwards has_returned_error: bool, } -impl<'data> RawTextSExpIterator_1_1<'data> { - pub(crate) fn new(input: TextBufferView<'data>) -> Self { +impl<'top> RawTextSExpIterator_1_1<'top> { + pub(crate) fn new(input: TextBufferView<'top>) -> Self { Self { input, has_returned_error: false, @@ -207,114 +278,70 @@ impl<'data> RawTextSExpIterator_1_1<'data> { } #[derive(Debug, Copy, Clone)] -pub struct LazyRawTextStruct_1_1<'data> { - pub(crate) value: LazyRawTextValue_1_1<'data>, +pub struct RawTextSequenceCacheIterator_1_1<'top> { + child_exprs: &'top [LazyRawValueExpr<'top, TextEncoding_1_1>], + index: usize, } -#[derive(Debug, Copy, Clone)] -pub struct RawTextStructIterator_1_1<'data> { - input: TextBufferView<'data>, - has_returned_error: bool, -} - -impl<'data> RawTextStructIterator_1_1<'data> { - pub(crate) fn new(input: TextBufferView<'data>) -> Self { +impl<'top> RawTextSequenceCacheIterator_1_1<'top> { + pub fn new(child_exprs: &'top [LazyRawValueExpr<'top, TextEncoding_1_1>]) -> Self { Self { - input, - has_returned_error: false, + child_exprs, + index: 0, } } } -// ===== Trait implementations ===== +impl<'top> Iterator for RawTextSequenceCacheIterator_1_1<'top> { + type Item = IonResult>; -impl<'data> LazyContainerPrivate<'data, TextEncoding_1_1> for LazyRawTextList_1_1<'data> { - fn from_value(value: LazyRawTextValue_1_1<'data>) -> Self { - LazyRawTextList_1_1 { value } + fn next(&mut self) -> Option { + let next_expr = self.child_exprs.get(self.index)?; + self.index += 1; + Some(Ok(*next_expr)) } } -impl<'data> LazyRawSequence<'data, TextEncoding_1_1> for LazyRawTextList_1_1<'data> { - type Iterator = RawTextListIterator_1_1<'data>; - - fn annotations(&self) -> RawTextAnnotationsIterator<'data> { - self.value.annotations() - } - - fn ion_type(&self) -> IonType { - self.value.ion_type() - } - - fn iter(&self) -> Self::Iterator { - let open_bracket_index = - self.value.matched.encoded_value.data_offset() - self.value.matched.input.offset(); - // Make an iterator over the input bytes that follow the initial `[` - RawTextListIterator_1_1::new( - self.value - .matched - .input - .slice_to_end(open_bracket_index + 1), - ) - } - - fn as_value(&self) -> LazyRawTextValue_1_1<'data> { - self.value.matched.into() - } +/// Wraps a [`RawTextSExpIterator_1_1`] (which parses the body of a sexp) and caches the child +/// expressions the iterator yields along the way. Finally, returns a `Range` representing +/// the span of input bytes that the sexp occupies. +pub(crate) struct TextSExpSpanFinder_1_1<'top> { + pub(crate) allocator: &'top bumpalo::Bump, + pub(crate) iterator: RawTextSExpIterator_1_1<'top>, } -impl<'data> Iterator for RawTextListIterator_1_1<'data> { - type Item = IonResult>; - - fn next(&mut self) -> Option { - if self.has_returned_error { - return None; - } - match self.input.match_list_value_1_1() { - Ok((remaining, Some(value_expr))) => { - self.input = remaining; - Some(Ok(value_expr)) - } - Ok((_remaining, None)) => { - // Don't update `remaining` so subsequent calls will continue to return None - None - } - Err(e) => { - self.has_returned_error = true; - e.with_context("reading the next list value", self.input) - .transpose() - } +impl<'top> TextSExpSpanFinder_1_1<'top> { + pub fn new(allocator: &'top bumpalo::Bump, iterator: RawTextSExpIterator_1_1<'top>) -> Self { + Self { + allocator, + iterator, } } -} - -impl<'data> LazyRawTextSExp_1_1<'data> { - pub fn ion_type(&self) -> IonType { - IonType::SExp - } - - pub fn iter(&self) -> RawTextSExpIterator_1_1<'data> { - let open_paren_index = - self.value.matched.encoded_value.data_offset() - self.value.matched.input.offset(); - // Make an iterator over the input bytes that follow the initial `(` - RawTextSExpIterator_1_1::new(self.value.matched.input.slice_to_end(open_paren_index + 1)) - } -} -// TODO: This impl is very similar to the 1.0 impl; see if we can DRY it up. -impl<'data> RawTextSExpIterator_1_1<'data> { /// Scans ahead to find the end of this s-expression and reports the input span that it occupies. + /// As it scans, it records lazy references to the S-expression's child expressions. /// /// The `initial_bytes_skipped` parameter indicates how many bytes of input that represented the /// beginning of the expression are not in the buffer. For plain s-expressions, this will always /// be `1` as they begin with a single open parenthesis `(`. For e-expressions (which are used /// to invoke macros from the data stream), it will always be a minimum of `3`: two bytes for /// the opening `(:` and at least one for the macro identifier. (For example: `(:foo`.) - pub(crate) fn find_span(&self, initial_bytes_skipped: usize) -> IonResult> { + pub(crate) fn find_span( + &self, + initial_bytes_skipped: usize, + ) -> IonResult<( + Range, + &'top [LazyRawValueExpr<'top, TextEncoding_1_1>], + )> { // The input has already skipped past the opening delimiter. - let start = self.input.offset() - initial_bytes_skipped; - // We need to find the input slice containing the closing delimiter. It's either... - let input_after_last = if let Some(value_expr_result) = self.last() { - let value_expr = value_expr_result?; + let start = self.iterator.input.offset() - initial_bytes_skipped; + let mut child_expr_cache = BumpVec::new_in(self.allocator); + for expr_result in self.iterator { + let expr = expr_result?; + child_expr_cache.push(expr); + } + + let input_after_last_expr = if let Some(value_expr) = child_expr_cache.last() { // ...the input slice that follows the last sequence value... match value_expr { RawValueExpr::ValueLiteral(value) => value @@ -323,34 +350,40 @@ impl<'data> RawTextSExpIterator_1_1<'data> { .slice_to_end(value.matched.encoded_value.total_length()), RawValueExpr::MacroInvocation(invocation) => { let end_of_expr = invocation.input.offset() + invocation.input.len(); - let remaining = self.input.slice_to_end(end_of_expr - self.input.offset()); + let remaining = self + .iterator + .input + .slice_to_end(end_of_expr - self.iterator.input.offset()); remaining } } } else { - // ...or there aren't values, so it's just the input after the opening delimiter. - self.input + // ...or there weren't any child values, so it's just the input after the opening delimiter. + self.iterator.input }; - let (input_after_ws, _ws) = input_after_last + + let (input_after_ws, _ws) = input_after_last_expr .match_optional_comments_and_whitespace() - .with_context("seeking the end of a sexp", input_after_last)?; + .with_context("seeking the end of a sexp", input_after_last_expr)?; let (input_after_end, _end_delimiter) = satisfy(|c| c == ')')(input_after_ws) .with_context("seeking the closing delimiter of a sexp", input_after_ws)?; let end = input_after_end.offset(); - Ok(start..end) + + let span = start..end; + Ok((span, child_expr_cache.into_bump_slice())) } } -impl<'data> LazyContainerPrivate<'data, TextEncoding_1_1> for LazyRawTextSExp_1_1<'data> { - fn from_value(value: LazyRawTextValue_1_1<'data>) -> Self { +impl<'top> LazyContainerPrivate<'top, TextEncoding_1_1> for LazyRawTextSExp_1_1<'top> { + fn from_value(value: LazyRawTextValue_1_1<'top>) -> Self { LazyRawTextSExp_1_1 { value } } } -impl<'data> LazyRawSequence<'data, TextEncoding_1_1> for LazyRawTextSExp_1_1<'data> { - type Iterator = RawTextSExpIterator_1_1<'data>; +impl<'top> LazyRawSequence<'top, TextEncoding_1_1> for LazyRawTextSExp_1_1<'top> { + type Iterator = RawTextSequenceCacheIterator_1_1<'top>; - fn annotations(&self) -> RawTextAnnotationsIterator<'data> { + fn annotations(&self) -> RawTextAnnotationsIterator<'top> { self.value.annotations() } @@ -359,20 +392,19 @@ impl<'data> LazyRawSequence<'data, TextEncoding_1_1> for LazyRawTextSExp_1_1<'da } fn iter(&self) -> Self::Iterator { - // Make an iterator over the input bytes that follow the initial `(`; account for - // a leading field name and/or annotations. - let open_paren_index = - self.value.matched.encoded_value.data_offset() - self.value.matched.input.offset(); - RawTextSExpIterator_1_1::new(self.value.matched.input.slice_to_end(open_paren_index + 1)) + let MatchedValue::SExp(child_exprs) = self.value.matched.encoded_value.matched() else { + unreachable!("s-expression contained a matched value of the wrong type") + }; + RawTextSequenceCacheIterator_1_1::new(child_exprs) } - fn as_value(&self) -> LazyRawTextValue_1_1<'data> { + fn as_value(&self) -> LazyRawTextValue_1_1<'top> { self.value.matched.into() } } -impl<'data> Iterator for RawTextSExpIterator_1_1<'data> { - type Item = IonResult>; +impl<'top> Iterator for RawTextSExpIterator_1_1<'top> { + type Item = IonResult>; fn next(&mut self) -> Option { if self.has_returned_error { @@ -393,29 +425,155 @@ impl<'data> Iterator for RawTextSExpIterator_1_1<'data> { } } -impl<'data> LazyContainerPrivate<'data, TextEncoding_1_1> for LazyRawTextStruct_1_1<'data> { - fn from_value(value: LazyRawTextValue_1_1<'data>) -> Self { +#[derive(Copy, Clone)] +pub struct LazyRawTextStruct_1_1<'top> { + pub(crate) value: LazyRawTextValue_1_1<'top>, +} + +impl<'a> Debug for LazyRawTextStruct_1_1<'a> { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + write!(f, "{{")?; + for field in self.iter() { + match field? { + LazyRawFieldExpr::::NameValuePair( + name, + RawValueExpr::ValueLiteral(value), + ) => write!(f, "{name:?}: {value:?}, "), + LazyRawFieldExpr::::NameValuePair( + name, + RawValueExpr::MacroInvocation(invocation), + ) => write!(f, "{name:?}: {invocation:?}, "), + LazyRawFieldExpr::::MacroInvocation(invocation) => { + write!(f, "{invocation:?}, ") + } + }?; + } + write!(f, "}}").unwrap(); + + Ok(()) + } +} + +#[derive(Debug, Copy, Clone)] +pub struct RawTextStructIterator_1_1<'top> { + input: TextBufferView<'top>, + has_returned_error: bool, +} + +impl<'top> RawTextStructIterator_1_1<'top> { + pub(crate) fn new(input: TextBufferView<'top>) -> Self { + Self { + input, + has_returned_error: false, + } + } +} + +#[derive(Debug, Copy, Clone)] +pub struct RawTextStructCacheIterator_1_1<'top> { + field_exprs: &'top [LazyRawFieldExpr<'top, TextEncoding_1_1>], + index: usize, +} + +impl<'top> RawTextStructCacheIterator_1_1<'top> { + pub fn new(field_exprs: &'top [LazyRawFieldExpr<'top, TextEncoding_1_1>]) -> Self { + Self { + field_exprs, + index: 0, + } + } +} + +impl<'top> Iterator for RawTextStructCacheIterator_1_1<'top> { + type Item = IonResult>; + + fn next(&mut self) -> Option { + let next_expr = self.field_exprs.get(self.index)?; + self.index += 1; + // TODO: Remove the result wrapper + Some(Ok(next_expr.clone())) + } +} + +// ===== Trait implementations ===== + +impl<'top> LazyContainerPrivate<'top, TextEncoding_1_1> for LazyRawTextList_1_1<'top> { + fn from_value(value: LazyRawTextValue_1_1<'top>) -> Self { + LazyRawTextList_1_1 { value } + } +} + +impl<'top> LazyRawSequence<'top, TextEncoding_1_1> for LazyRawTextList_1_1<'top> { + type Iterator = RawTextSequenceCacheIterator_1_1<'top>; + + fn annotations(&self) -> RawTextAnnotationsIterator<'top> { + self.value.annotations() + } + + fn ion_type(&self) -> IonType { + self.value.ion_type() + } + + fn iter(&self) -> Self::Iterator { + let MatchedValue::List(child_exprs) = self.value.matched.encoded_value.matched() else { + unreachable!("list contained a matched value of the wrong type") + }; + RawTextSequenceCacheIterator_1_1::new(child_exprs) + } + + fn as_value(&self) -> LazyRawTextValue_1_1<'top> { + self.value.matched.into() + } +} + +impl<'top> Iterator for RawTextListIterator_1_1<'top> { + type Item = IonResult>; + + fn next(&mut self) -> Option { + if self.has_returned_error { + return None; + } + match self.input.match_list_value_1_1() { + Ok((remaining, Some(value_expr))) => { + self.input = remaining; + Some(Ok(value_expr)) + } + Ok((_remaining, None)) => { + // Don't update `remaining` so subsequent calls will continue to return None + None + } + Err(e) => { + self.has_returned_error = true; + e.with_context("reading the next list value", self.input) + .transpose() + } + } + } +} + +impl<'top> LazyContainerPrivate<'top, TextEncoding_1_1> for LazyRawTextStruct_1_1<'top> { + fn from_value(value: LazyRawTextValue_1_1<'top>) -> Self { LazyRawTextStruct_1_1 { value } } } -impl<'data> LazyRawStruct<'data, TextEncoding_1_1> for LazyRawTextStruct_1_1<'data> { - type Iterator = RawTextStructIterator_1_1<'data>; +impl<'top> LazyRawStruct<'top, TextEncoding_1_1> for LazyRawTextStruct_1_1<'top> { + type Iterator = RawTextStructCacheIterator_1_1<'top>; - fn annotations(&self) -> RawTextAnnotationsIterator<'data> { + fn annotations(&self) -> RawTextAnnotationsIterator<'top> { self.value.annotations() } fn iter(&self) -> Self::Iterator { - let open_brace_index = - self.value.matched.encoded_value.data_offset() - self.value.matched.input.offset(); - // Slice the input to skip the opening `{` - RawTextStructIterator_1_1::new(self.value.matched.input.slice_to_end(open_brace_index + 1)) + let MatchedValue::Struct(field_exprs) = self.value.matched.encoded_value.matched() else { + unreachable!("struct contained a matched value of the wrong type") + }; + RawTextStructCacheIterator_1_1::new(field_exprs) } } -impl<'data> Iterator for RawTextStructIterator_1_1<'data> { - type Item = IonResult>; +impl<'top> Iterator for RawTextStructIterator_1_1<'top> { + type Item = IonResult>; fn next(&mut self) -> Option { if self.has_returned_error { @@ -436,7 +594,7 @@ impl<'data> Iterator for RawTextStructIterator_1_1<'data> { } } -impl<'data> RawTextStructIterator_1_1<'data> { +impl<'top> RawTextStructIterator_1_1<'top> { // TODO: DRY with RawTextStructIterator_1_0 pub(crate) fn find_span(&self) -> IonResult> { // The input has already skipped past the opening delimiter. @@ -480,17 +638,89 @@ impl<'data> RawTextStructIterator_1_1<'data> { } } +/// Wraps a [`RawTextStructIterator_1_1`] (which parses the body of a struct) and caches the field +/// expressions the iterator yields along the way. Finally, returns a `Range` representing +/// the span of input bytes that the struct occupies. +pub(crate) struct TextStructSpanFinder_1_1<'top> { + pub(crate) allocator: &'top bumpalo::Bump, + pub(crate) iterator: RawTextStructIterator_1_1<'top>, +} + +impl<'top> TextStructSpanFinder_1_1<'top> { + pub fn new(allocator: &'top bumpalo::Bump, iterator: RawTextStructIterator_1_1<'top>) -> Self { + Self { + allocator, + iterator, + } + } + + /// Scans ahead to find the end of this struct and reports the input span that it occupies. + /// As it scans, it records lazy references to the struct's field expressions. + pub(crate) fn find_span( + &self, + ) -> IonResult<( + Range, + &'top [LazyRawFieldExpr<'top, TextEncoding_1_1>], + )> { + // The input has already skipped past the opening delimiter. + let start = self.iterator.input.offset() - 1; + let mut child_expr_cache = BumpVec::new_in(self.allocator); + for expr_result in self.iterator { + let expr = expr_result?; + child_expr_cache.push(expr); + } + + // We need to find the input slice containing the closing delimiter. + let input_after_last = if let Some(field_result) = child_expr_cache.last() { + // If there are any field expressions, we need to isolate the input slice that follows + // the last one. + use RawFieldExpr::*; + match field_result { + // foo: bar + NameValuePair(_name, RawValueExpr::ValueLiteral(value)) => { + value.matched.input.slice_to_end(value.matched.encoded_value.total_length()) + }, + // foo: (:bar ...) + NameValuePair(_, RawValueExpr::MacroInvocation(invocation)) + // (:foo) + | MacroInvocation(invocation) => { + self.iterator.input.slice_to_end(invocation.input.len()) + } + } + } else { + // ...or there aren't fields, so it's just the input after the opening delimiter. + self.iterator.input + }; + let (mut input_after_ws, _ws) = + input_after_last + .match_optional_comments_and_whitespace() + .with_context("seeking the end of a struct", input_after_last)?; + // Skip an optional comma and more whitespace + if input_after_ws.bytes().first() == Some(&b',') { + (input_after_ws, _) = input_after_ws + .slice_to_end(1) + .match_optional_comments_and_whitespace() + .with_context("skipping a struct's trailing comma", input_after_ws)?; + } + let (input_after_end, _end_delimiter) = satisfy(|c| c == b'}' as char)(input_after_ws) + .with_context("seeking the closing delimiter of a struct", input_after_ws)?; + let end = input_after_end.offset(); + Ok((start..end, child_expr_cache.into_bump_slice())) + } +} + #[cfg(test)] mod tests { use super::*; use crate::lazy::raw_value_ref::RawValueRef; - fn expect_next<'data>( - reader: &mut LazyRawTextReader_1_1<'data>, - expected: RawValueRef<'data, TextEncoding_1_1>, + fn expect_next<'top, 'data: 'top>( + allocator: &'top BumpAllocator, + reader: &'top mut LazyRawTextReader_1_1<'data>, + expected: RawValueRef<'top, TextEncoding_1_1>, ) { let lazy_value = reader - .next() + .next(allocator) .expect("advancing the reader failed") .expect_value() .expect("expected a value"); @@ -514,16 +744,21 @@ mod tests { false "#; + let allocator = BumpAllocator::new(); let reader = &mut LazyRawTextReader_1_1::new(data.as_bytes()); // $ion_1_1 - assert_eq!(reader.next()?.expect_ivm()?, (1, 1)); + assert_eq!(reader.next(&allocator)?.expect_ivm()?, (1, 1)); // "foo" - expect_next(reader, RawValueRef::String("foo".into())); + expect_next(&allocator, reader, RawValueRef::String("foo".into())); // bar - expect_next(reader, RawValueRef::Symbol("bar".into())); + expect_next(&allocator, reader, RawValueRef::Symbol("bar".into())); // (baz null.string) - let sexp = reader.next()?.expect_value()?.read()?.expect_sexp()?; + let sexp = reader + .next(&allocator)? + .expect_value()? + .read()? + .expect_sexp()?; let mut children = sexp.iter(); assert_eq!( children.next().unwrap()?.expect_value()?.read()?, @@ -535,10 +770,10 @@ mod tests { ); assert!(children.next().is_none()); // (:quux quuz) - let macro_invocation = reader.next()?.expect_macro_invocation()?; + let macro_invocation = reader.next(&allocator)?.expect_macro_invocation()?; assert_eq!(macro_invocation.id, MacroIdRef::LocalName("quux")); - expect_next(reader, RawValueRef::Int(77.into())); - expect_next(reader, RawValueRef::Bool(false)); + expect_next(&allocator, reader, RawValueRef::Int(77.into())); + expect_next(&allocator, reader, RawValueRef::Bool(false)); Ok(()) } } diff --git a/src/lazy/text/value.rs b/src/lazy/text/value.rs index 8b5cbaaa..3373b832 100644 --- a/src/lazy/text/value.rs +++ b/src/lazy/text/value.rs @@ -2,10 +2,9 @@ use std::fmt; use std::fmt::{Debug, Formatter}; -use std::marker::PhantomData; use crate::lazy::decoder::private::{LazyContainerPrivate, LazyRawValuePrivate}; -use crate::lazy::decoder::LazyRawValue; +use crate::lazy::decoder::{LazyDecoder, LazyRawValue}; use crate::lazy::encoding::{TextEncoding, TextEncoding_1_0, TextEncoding_1_1}; use crate::lazy::raw_value_ref::RawValueRef; use crate::lazy::text::buffer::TextBufferView; @@ -23,12 +22,12 @@ use crate::{IonResult, IonType, RawSymbolTokenRef}; /// includes a text definition for these items whenever one exists, see /// [`crate::lazy::value::LazyValue`]. #[derive(Copy, Clone)] -pub struct MatchedRawTextValue<'data> { - pub(crate) encoded_value: EncodedTextValue, - pub(crate) input: TextBufferView<'data>, +pub struct MatchedRawTextValue<'top, E: TextEncoding<'top>> { + pub(crate) encoded_value: EncodedTextValue<'top, E>, + pub(crate) input: TextBufferView<'top>, } -impl<'a> Debug for MatchedRawTextValue<'a> { +impl<'top, E: TextEncoding<'top>> Debug for MatchedRawTextValue<'top, E> { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { write!( f, @@ -42,24 +41,20 @@ impl<'a> Debug for MatchedRawTextValue<'a> { // // These types provide Ion-version-specific impls of the LazyRawValue trait #[derive(Copy, Clone)] -pub struct LazyRawTextValue<'data, E: TextEncoding<'data> + Copy> { - pub(crate) matched: MatchedRawTextValue<'data>, - spooky: PhantomData, +pub struct LazyRawTextValue<'top, E: TextEncoding<'top> + Copy> { + pub(crate) matched: MatchedRawTextValue<'top, E>, } -impl<'data, E: TextEncoding<'data>> LazyRawTextValue<'data, E> { - pub fn new(matched: MatchedRawTextValue<'data>) -> Self { - Self { - matched, - spooky: PhantomData, - } +impl<'top, E: TextEncoding<'top>> LazyRawTextValue<'top, E> { + pub fn new(matched: MatchedRawTextValue<'top, E>) -> Self { + Self { matched } } } -pub type LazyRawTextValue_1_0<'data> = LazyRawTextValue<'data, TextEncoding_1_0>; -pub type LazyRawTextValue_1_1<'data> = LazyRawTextValue<'data, TextEncoding_1_1>; +pub type LazyRawTextValue_1_0<'top> = LazyRawTextValue<'top, TextEncoding_1_0>; +pub type LazyRawTextValue_1_1<'top> = LazyRawTextValue<'top, TextEncoding_1_1>; -impl<'data, E: TextEncoding<'data>> Debug for LazyRawTextValue<'data, E> { +impl<'top, E: TextEncoding<'top>> Debug for LazyRawTextValue<'top, E> { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { write!(f, "{}", E::name())?; @@ -73,37 +68,31 @@ impl<'data, E: TextEncoding<'data>> Debug for LazyRawTextValue<'data, E> { } } -impl<'data> From> for LazyRawTextValue_1_0<'data> { - fn from(matched: MatchedRawTextValue<'data>) -> Self { +impl<'top> From> for LazyRawTextValue_1_0<'top> { + fn from(matched: MatchedRawTextValue<'top, TextEncoding_1_0>) -> Self { LazyRawTextValue::new(matched) } } -impl<'data> From> for LazyRawTextValue_1_1<'data> { - fn from(matched: MatchedRawTextValue<'data>) -> Self { +impl<'top> From> for LazyRawTextValue_1_1<'top> { + fn from(matched: MatchedRawTextValue<'top, TextEncoding_1_1>) -> Self { LazyRawTextValue::new(matched) } } -impl<'data> LazyRawValuePrivate<'data> for MatchedRawTextValue<'data> { +impl<'top, E: TextEncoding<'top>> LazyRawValuePrivate<'top> for MatchedRawTextValue<'top, E> { // TODO: We likely want to move this functionality to the Ion-version-specific LazyDecoder::Field // implementations. See: https://github.com/amazon-ion/ion-rust/issues/631 - fn field_name(&self) -> IonResult> { + fn field_name(&self) -> IonResult> { self.encoded_value.field_name(self.input) } } -impl<'data> MatchedRawTextValue<'data> { - /// No-op compiler hint that we want a particular generic flavor of MatchedRawTextValue - pub(crate) fn as_version>(&self) -> &impl LazyRawValue<'data, D> { - self - } -} // ===== Ion-version-agnostic functionality ===== // // These trait impls are common to all Ion versions, but require the caller to specify a type // parameter. -impl<'data, D: TextEncoding<'data>> LazyRawValue<'data, D> for MatchedRawTextValue<'data> { +impl<'top, E: TextEncoding<'top>> LazyRawValue<'top, E> for MatchedRawTextValue<'top, E> { fn ion_type(&self) -> IonType { self.encoded_value.ion_type() } @@ -112,7 +101,7 @@ impl<'data, D: TextEncoding<'data>> LazyRawValue<'data, D> for MatchedRawTextVal self.encoded_value.is_null() } - fn annotations(&self) -> D::AnnotationsIterator { + fn annotations(&self) -> ::AnnotationsIterator<'top> { let span = self .encoded_value .annotations_range() @@ -123,7 +112,7 @@ impl<'data, D: TextEncoding<'data>> LazyRawValue<'data, D> for MatchedRawTextVal RawTextAnnotationsIterator::new(annotations_bytes) } - fn read(&self) -> IonResult> { + fn read(&self) -> IonResult> { let matched_input = self.input.slice( self.encoded_value.data_offset() - self.input.offset(), self.encoded_value.data_length(), @@ -141,35 +130,35 @@ impl<'data, D: TextEncoding<'data>> LazyRawValue<'data, D> for MatchedRawTextVal Symbol(s) => RawValueRef::Symbol(s.read(matched_input)?), Blob(b) => RawValueRef::Blob(b.read(matched_input)?), Clob(c) => RawValueRef::Clob(c.read(matched_input)?), - List => RawValueRef::List(D::List::from_value(D::value_from_matched(*self))), - SExp => RawValueRef::SExp(D::SExp::from_value(D::value_from_matched(*self))), - Struct => RawValueRef::Struct(D::Struct::from_value(D::value_from_matched(*self))), + List(_) => RawValueRef::List(E::List::<'top>::from_value(E::value_from_matched(*self))), + SExp(_) => RawValueRef::SExp(E::SExp::<'top>::from_value(E::value_from_matched(*self))), + Struct(_) => RawValueRef::Struct(E::Struct::from_value(E::value_from_matched(*self))), }; Ok(value_ref) } } -impl<'data, E: TextEncoding<'data>> LazyRawValuePrivate<'data> for LazyRawTextValue<'data, E> { - fn field_name(&self) -> IonResult> { +impl<'top, E: TextEncoding<'top>> LazyRawValuePrivate<'top> for LazyRawTextValue<'top, E> { + fn field_name(&self) -> IonResult> { self.matched.field_name() } } -impl<'data, E: TextEncoding<'data>> LazyRawValue<'data, E> for LazyRawTextValue<'data, E> { +impl<'top, E: TextEncoding<'top>> LazyRawValue<'top, E> for LazyRawTextValue<'top, E> { fn ion_type(&self) -> IonType { - self.matched.as_version::().ion_type() + self.matched.ion_type() } fn is_null(&self) -> bool { - self.matched.as_version::().is_null() + self.matched.is_null() } - fn annotations(&self) -> RawTextAnnotationsIterator<'data> { - self.matched.as_version::().annotations() + fn annotations(&self) -> ::AnnotationsIterator<'top> { + self.matched.annotations() } - fn read(&self) -> IonResult> { - self.matched.as_version::().read() + fn read(&self) -> IonResult> { + self.matched.read() } } @@ -178,8 +167,8 @@ pub struct RawTextAnnotationsIterator<'data> { has_returned_error: bool, } -impl<'data> RawTextAnnotationsIterator<'data> { - pub(crate) fn new(input: TextBufferView<'data>) -> Self { +impl<'top> RawTextAnnotationsIterator<'top> { + pub(crate) fn new(input: TextBufferView<'top>) -> Self { RawTextAnnotationsIterator { input, has_returned_error: false, @@ -187,8 +176,8 @@ impl<'data> RawTextAnnotationsIterator<'data> { } } -impl<'data> Iterator for RawTextAnnotationsIterator<'data> { - type Item = IonResult>; +impl<'top> Iterator for RawTextAnnotationsIterator<'top> { + type Item = IonResult>; fn next(&mut self) -> Option { if self.has_returned_error || self.input.is_empty() { @@ -218,6 +207,8 @@ impl<'data> Iterator for RawTextAnnotationsIterator<'data> { #[cfg(test)] mod tests { + use bumpalo::Bump as BumpAllocator; + use crate::lazy::text::buffer::TextBufferView; use crate::lazy::text::value::RawTextAnnotationsIterator; use crate::{IonResult, RawSymbolTokenRef}; @@ -225,7 +216,8 @@ mod tests { #[test] fn iterate_annotations() -> IonResult<()> { fn test(input: &str) -> IonResult<()> { - let input = TextBufferView::new(input.as_bytes()); + let allocator = BumpAllocator::new(); + let input = TextBufferView::new(&allocator, input.as_bytes()); let mut iter = RawTextAnnotationsIterator::new(input); assert_eq!(iter.next().unwrap()?, RawSymbolTokenRef::Text("foo".into())); assert_eq!(iter.next().unwrap()?, RawSymbolTokenRef::Text("bar".into())); diff --git a/src/lazy/value.rs b/src/lazy/value.rs index d1343974..b753c868 100644 --- a/src/lazy/value.rs +++ b/src/lazy/value.rs @@ -54,16 +54,14 @@ use crate::{ ///# } /// ``` #[derive(Clone)] -pub struct LazyValue<'top, 'data, D: LazyDecoder<'data>> { - pub(crate) expanded_value: LazyExpandedValue<'top, 'data, D>, +pub struct LazyValue<'top, D: LazyDecoder> { + pub(crate) expanded_value: LazyExpandedValue<'top, D>, } -pub type LazyBinaryValue<'top, 'data> = LazyValue<'top, 'data, BinaryEncoding_1_0>; +pub type LazyBinaryValue<'top> = LazyValue<'top, BinaryEncoding_1_0>; -impl<'top, 'data: 'top, D: LazyDecoder<'data>> LazyValue<'top, 'data, D> { - pub(crate) fn new( - expanded_value: LazyExpandedValue<'top, 'data, D>, - ) -> LazyValue<'top, 'data, D> { +impl<'top, D: LazyDecoder> LazyValue<'top, D> { + pub(crate) fn new(expanded_value: LazyExpandedValue<'top, D>) -> LazyValue<'top, D> { LazyValue { expanded_value } } @@ -125,7 +123,7 @@ impl<'top, 'data: 'top, D: LazyDecoder<'data>> LazyValue<'top, 'data, D> { ///# Ok(()) ///# } /// ``` - pub fn annotations(&self) -> AnnotationsIterator<'top, 'data, D> { + pub fn annotations(&self) -> AnnotationsIterator<'top, D> { AnnotationsIterator { expanded_annotations: self.expanded_value.annotations(), symbol_table: self.expanded_value.context.symbol_table, @@ -159,7 +157,7 @@ impl<'top, 'data: 'top, D: LazyDecoder<'data>> LazyValue<'top, 'data, D> { ///# Ok(()) ///# } /// ``` - pub fn read(&self) -> IonResult> { + pub fn read(&self) -> IonResult> { use ExpandedValueRef::*; let value_ref = match self.expanded_value.read()? { @@ -208,10 +206,10 @@ impl<'top, 'data: 'top, D: LazyDecoder<'data>> LazyValue<'top, 'data, D> { } } -impl<'top, 'data, D: LazyDecoder<'data>> TryFrom> for Element { +impl<'top, D: LazyDecoder> TryFrom> for Element { type Error = IonError; - fn try_from(value: LazyValue<'top, 'data, D>) -> Result { + fn try_from(value: LazyValue<'top, D>) -> Result { let annotations: Annotations = value.annotations().try_into()?; let value: Value = value.read()?.try_into()?; Ok(value.with_annotations(annotations)) @@ -220,12 +218,12 @@ impl<'top, 'data, D: LazyDecoder<'data>> TryFrom> for /// Iterates over a slice of bytes, lazily reading them as a sequence of symbol tokens encoded /// using the format described by generic type parameter `D`. -pub struct AnnotationsIterator<'top, 'data, D: LazyDecoder<'data>> { - pub(crate) expanded_annotations: ExpandedAnnotationsIterator<'top, 'data, D>, +pub struct AnnotationsIterator<'top, D: LazyDecoder> { + pub(crate) expanded_annotations: ExpandedAnnotationsIterator<'top, D>, pub(crate) symbol_table: &'top SymbolTable, } -impl<'top, 'data: 'top, D: LazyDecoder<'data>> AnnotationsIterator<'top, 'data, D> { +impl<'top, D: LazyDecoder> AnnotationsIterator<'top, D> { /// Returns `Ok(true)` if this annotations iterator matches the provided sequence exactly, or /// `Ok(false)` if not. If a decoding error occurs while visiting and resolving each annotation, /// returns an `Err(IonError)`. @@ -308,7 +306,7 @@ impl<'top, 'data: 'top, D: LazyDecoder<'data>> AnnotationsIterator<'top, 'data, } } -impl<'top, 'data: 'top, D: LazyDecoder<'data>> Iterator for AnnotationsIterator<'top, 'data, D> { +impl<'top, D: LazyDecoder> Iterator for AnnotationsIterator<'top, D> { type Item = IonResult>; fn next(&mut self) -> Option { @@ -326,12 +324,10 @@ impl<'top, 'data: 'top, D: LazyDecoder<'data>> Iterator for AnnotationsIterator< } } -impl<'top, 'data: 'top, D: LazyDecoder<'data>> TryFrom> - for Annotations -{ +impl<'top, D: LazyDecoder> TryFrom> for Annotations { type Error = IonError; - fn try_from(iter: AnnotationsIterator<'top, 'data, D>) -> Result { + fn try_from(iter: AnnotationsIterator<'top, D>) -> Result { let annotations = iter .map(|symbol_ref| match symbol_ref { Ok(symbol_ref) => Ok(symbol_ref.to_owned()), diff --git a/src/lazy/value_ref.rs b/src/lazy/value_ref.rs index 2dce27a4..642767c1 100644 --- a/src/lazy/value_ref.rs +++ b/src/lazy/value_ref.rs @@ -15,7 +15,7 @@ use std::fmt::{Debug, Formatter}; /// Unlike a [Value], a `ValueRef` avoids heap allocation whenever possible, choosing to point instead /// to existing resources. Numeric values and timestamps are stored within the `ValueRef` itself. /// Text values and lobs hold references to either a slice of input data or text in the symbol table. -pub enum ValueRef<'top, 'data, D: LazyDecoder<'data>> { +pub enum ValueRef<'top, D: LazyDecoder> { Null(IonType), Bool(bool), Int(Int), @@ -26,12 +26,12 @@ pub enum ValueRef<'top, 'data, D: LazyDecoder<'data>> { Symbol(SymbolRef<'top>), Blob(BytesRef<'top>), Clob(BytesRef<'top>), - SExp(LazySExp<'top, 'data, D>), - List(LazyList<'top, 'data, D>), - Struct(LazyStruct<'top, 'data, D>), + SExp(LazySExp<'top, D>), + List(LazyList<'top, D>), + Struct(LazyStruct<'top, D>), } -impl<'top, 'data, D: LazyDecoder<'data>> PartialEq for ValueRef<'top, 'data, D> { +impl<'top, D: LazyDecoder> PartialEq for ValueRef<'top, D> { fn eq(&self, other: &Self) -> bool { use ValueRef::*; match (self, other) { @@ -52,7 +52,7 @@ impl<'top, 'data, D: LazyDecoder<'data>> PartialEq for ValueRef<'top, 'data, D> } } -impl<'top, 'data, D: LazyDecoder<'data>> Debug for ValueRef<'top, 'data, D> { +impl<'top, D: LazyDecoder> Debug for ValueRef<'top, D> { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { use ValueRef::*; match self { @@ -73,10 +73,10 @@ impl<'top, 'data, D: LazyDecoder<'data>> Debug for ValueRef<'top, 'data, D> { } } -impl<'top, 'data, D: LazyDecoder<'data>> TryFrom> for Value { +impl<'top, D: LazyDecoder> TryFrom> for Value { type Error = IonError; - fn try_from(value: ValueRef<'top, 'data, D>) -> Result { + fn try_from(value: ValueRef<'top, D>) -> Result { use ValueRef::*; let value = match value { Null(ion_type) => Value::Null(ion_type), @@ -97,16 +97,16 @@ impl<'top, 'data, D: LazyDecoder<'data>> TryFrom> for V } } -impl<'top, 'data, D: LazyDecoder<'data>> TryFrom> for Element { +impl<'top, D: LazyDecoder> TryFrom> for Element { type Error = IonError; - fn try_from(value_ref: ValueRef<'top, 'data, D>) -> Result { + fn try_from(value_ref: ValueRef<'top, D>) -> Result { let value: Value = value_ref.try_into()?; Ok(value.into()) } } -impl<'top, 'data, D: LazyDecoder<'data>> ValueRef<'top, 'data, D> { +impl<'top, D: LazyDecoder> ValueRef<'top, D> { pub fn expect_null(self) -> IonResult { if let ValueRef::Null(ion_type) = self { Ok(ion_type) @@ -195,7 +195,7 @@ impl<'top, 'data, D: LazyDecoder<'data>> ValueRef<'top, 'data, D> { } } - pub fn expect_list(self) -> IonResult> { + pub fn expect_list(self) -> IonResult> { if let ValueRef::List(s) = self { Ok(s) } else { @@ -203,7 +203,7 @@ impl<'top, 'data, D: LazyDecoder<'data>> ValueRef<'top, 'data, D> { } } - pub fn expect_sexp(self) -> IonResult> { + pub fn expect_sexp(self) -> IonResult> { if let ValueRef::SExp(s) = self { Ok(s) } else { @@ -211,7 +211,7 @@ impl<'top, 'data, D: LazyDecoder<'data>> ValueRef<'top, 'data, D> { } } - pub fn expect_struct(self) -> IonResult> { + pub fn expect_struct(self) -> IonResult> { if let ValueRef::Struct(s) = self { Ok(s) } else {