-
Notifications
You must be signed in to change notification settings - Fork 36
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Adds the (annotate ...)
form and bin read support for flex_uint
parameters
#801
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,6 @@ | ||
use crate::lazy::binary::raw::type_descriptor::Header; | ||
use crate::lazy::binary::raw::v1_1::immutable_buffer::AnnotationsEncoding; | ||
use crate::lazy::expanded::template::ParameterEncoding; | ||
use crate::IonType; | ||
use std::ops::Range; | ||
|
||
|
@@ -40,6 +41,7 @@ impl EncodedHeader for Header { | |
/// without re-parsing its header information each time. | ||
#[derive(Clone, Copy, Debug, PartialEq)] | ||
pub(crate) struct EncodedValue<HeaderType: EncodedHeader> { | ||
pub(crate) encoding: ParameterEncoding, | ||
// If the compiler decides that a value is too large to be moved/copied with inline code, | ||
// it will relocate the value using memcpy instead. This can be quite slow by comparison. | ||
// | ||
|
@@ -88,6 +90,8 @@ pub(crate) struct EncodedValue<HeaderType: EncodedHeader> { | |
pub annotations_encoding: AnnotationsEncoding, | ||
// The offset of the type descriptor byte within the overall input stream. | ||
pub header_offset: usize, | ||
// If this value was written with a tagless encoding, this will be 0. Otherwise, it's 1. | ||
pub opcode_length: u8, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🗺️ Retrofitting support for encodings other than There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can this be derived (i.e. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, I had the same thought mid-PR. I've opened #805 to track refactoring this. |
||
// The number of bytes used to encode the optional length VarUInt following the header byte. | ||
pub length_length: u8, | ||
// The number of bytes used to encode the value itself, not including the header byte | ||
|
@@ -258,12 +262,14 @@ mod tests { | |
use crate::lazy::binary::encoded_value::EncodedValue; | ||
use crate::lazy::binary::raw::type_descriptor::Header; | ||
use crate::lazy::binary::raw::v1_1::immutable_buffer::AnnotationsEncoding; | ||
use crate::lazy::expanded::template::ParameterEncoding; | ||
use crate::{IonResult, IonType}; | ||
|
||
#[test] | ||
fn accessors() -> IonResult<()> { | ||
// 3-byte String with 1-byte annotation | ||
let value = EncodedValue { | ||
encoding: ParameterEncoding::Tagged, | ||
header: Header { | ||
ion_type: IonType::String, | ||
ion_type_code: IonTypeCode::String, | ||
|
@@ -273,6 +279,7 @@ mod tests { | |
annotations_sequence_length: 1, | ||
annotations_encoding: AnnotationsEncoding::SymbolAddress, | ||
header_offset: 200, | ||
opcode_length: 1, | ||
length_length: 0, | ||
value_body_length: 3, | ||
total_length: 7, | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -20,6 +20,7 @@ use crate::lazy::encoder::binary::v1_1::flex_int::FlexInt; | |
use crate::lazy::encoder::binary::v1_1::flex_sym::FlexSym; | ||
use crate::lazy::encoder::binary::v1_1::flex_uint::FlexUInt; | ||
use crate::lazy::expanded::macro_table::MacroRef; | ||
use crate::lazy::expanded::template::ParameterEncoding; | ||
use crate::lazy::expanded::EncodingContextRef; | ||
use crate::lazy::text::raw::v1_1::arg_group::EExpArgExpr; | ||
use crate::result::IonFailure; | ||
|
@@ -223,6 +224,38 @@ impl<'a> ImmutableBuffer<'a> { | |
Ok((flex_uint, remaining)) | ||
} | ||
|
||
pub fn read_flex_uint_as_lazy_value(self) -> ParseResult<'a, LazyRawBinaryValue_1_1<'a>> { | ||
let Some(first_byte) = self.peek_next_byte() else { | ||
return IonResult::incomplete("a flex_uint", self.offset()); | ||
}; | ||
let size_in_bytes = match first_byte { | ||
// If the first byte is zero, this flex_uint is encoded using 9+ bytes. That's pretty | ||
// uncommon, so we'll just use the existing logic in the `read` method and discard the | ||
// value. If this shows up in profiles, it can be optimized further. | ||
0 => FlexUInt::read(self.bytes(), self.offset())?.size_in_bytes(), | ||
_ => first_byte.trailing_zeros() as usize + 1, | ||
}; | ||
|
||
if self.len() < size_in_bytes { | ||
return IonResult::incomplete("reading a flex_uint value", self.offset()); | ||
} | ||
// XXX: This *doesn't* slice `self` because FlexUInt::read() is faster if the input | ||
// is at least the size of a u64. | ||
let matched_input = self; | ||
let remaining_input = self.slice_to_end(size_in_bytes); | ||
Comment on lines
+242
to
+245
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🗺️ I have a fix for this waiting in the PR that follows this one. |
||
let value = LazyRawBinaryValue_1_1::for_flex_uint(matched_input); | ||
Ok((value, remaining_input)) | ||
} | ||
|
||
pub fn slice_to_end(&self, offset: usize) -> ImmutableBuffer<'a> { | ||
ImmutableBuffer { | ||
data: &self.data[offset..], | ||
// stream offset + local offset | ||
offset: self.offset + offset, | ||
context: self.context, | ||
} | ||
} | ||
|
||
#[inline] | ||
pub fn read_flex_sym(self) -> ParseResult<'a, FlexSym<'a>> { | ||
let flex_sym = FlexSym::read(self.bytes(), self.offset())?; | ||
|
@@ -448,12 +481,15 @@ impl<'a> ImmutableBuffer<'a> { | |
+ value_length; | ||
|
||
let encoded_value = EncodedValue { | ||
encoding: ParameterEncoding::Tagged, | ||
header, | ||
// If applicable, these are populated by the caller: `read_annotated_value()` | ||
annotations_header_length: 0, | ||
annotations_sequence_length: 0, | ||
annotations_encoding: AnnotationsEncoding::SymbolAddress, | ||
header_offset, | ||
// This is a tagged value, so its opcode length is always 1 | ||
opcode_length: 1, | ||
length_length, | ||
value_body_length: value_length, | ||
total_length, | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,13 +3,16 @@ | |
use std::fmt::Debug; | ||
use std::ops::Range; | ||
|
||
use crate::lazy::binary::raw::v1_1::immutable_buffer::AnnotationsEncoding; | ||
use crate::lazy::binary::raw::v1_1::r#struct::LazyRawBinaryStruct_1_1; | ||
use crate::lazy::binary::raw::v1_1::sequence::{LazyRawBinaryList_1_1, LazyRawBinarySExp_1_1}; | ||
use crate::lazy::bytes_ref::BytesRef; | ||
use crate::lazy::decoder::{HasRange, HasSpan, RawVersionMarker}; | ||
use crate::lazy::expanded::template::ParameterEncoding; | ||
use crate::lazy::expanded::EncodingContextRef; | ||
use crate::lazy::span::Span; | ||
use crate::lazy::str_ref::StrRef; | ||
use crate::v1_1::FlexUInt; | ||
use crate::{ | ||
lazy::{ | ||
binary::{ | ||
|
@@ -132,6 +135,12 @@ impl<'top> LazyRawValue<'top, BinaryEncoding_1_1> for &'top LazyRawBinaryValue_1 | |
} | ||
|
||
fn read(&self) -> IonResult<RawValueRef<'top, BinaryEncoding_1_1>> { | ||
if self.encoded_value.encoding == ParameterEncoding::FlexUInt { | ||
let flex_uint = FlexUInt::read(self.input.bytes(), self.input.offset())?; | ||
let int: Int = flex_uint.value().into(); | ||
return Ok(RawValueRef::Int(int)); | ||
} | ||
|
||
if self.is_null() { | ||
let ion_type = if self.encoded_value.header.ion_type_code == OpcodeType::TypedNull { | ||
let body = self.value_body(); | ||
|
@@ -176,6 +185,11 @@ impl<'top> LazyRawValue<'top, BinaryEncoding_1_1> for &'top LazyRawBinaryValue_1 | |
&self, | ||
context: EncodingContextRef<'top>, | ||
) -> IonResult<ValueRef<'top, BinaryEncoding_1_1>> { | ||
if self.encoded_value.encoding == ParameterEncoding::FlexUInt { | ||
let flex_uint = FlexUInt::read(self.input.bytes(), self.input.offset())?; | ||
let int: Int = flex_uint.value().into(); | ||
return Ok(ValueRef::Int(int)); | ||
} | ||
if self.is_null() { | ||
return Ok(ValueRef::Null(self.ion_type())); | ||
} | ||
|
@@ -194,6 +208,12 @@ impl<'top> LazyRawValue<'top, BinaryEncoding_1_1> for &'top LazyRawBinaryValue_1 | |
value: &'a LazyRawBinaryValue_1_1<'a>, | ||
context: EncodingContextRef<'a>, | ||
) -> IonResult<ValueRef<'a, BinaryEncoding_1_1>> { | ||
if value.encoded_value.encoding == ParameterEncoding::FlexUInt { | ||
let flex_uint = FlexUInt::read(value.input.bytes(), value.input.offset())?; | ||
let int: Int = flex_uint.value().into(); | ||
return Ok(ValueRef::Int(int)); | ||
} | ||
|
||
if value.is_null() { | ||
return Ok(ValueRef::Null(value.ion_type())); | ||
} | ||
|
@@ -246,6 +266,37 @@ impl<'top> LazyRawValue<'top, BinaryEncoding_1_1> for &'top LazyRawBinaryValue_1 | |
} | ||
|
||
impl<'top> LazyRawBinaryValue_1_1<'top> { | ||
/// Constructs a lazy raw binary value from an input buffer slice that has been found to contain | ||
/// a complete `FlexUInt`. | ||
pub(crate) fn for_flex_uint(input: ImmutableBuffer<'top>) -> Self { | ||
let encoded_value = EncodedValue { | ||
encoding: ParameterEncoding::FlexUInt, | ||
header: Header { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Perhaps There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'll look into this as part of #805. |
||
// It is an int, that's true. | ||
ion_type: IonType::Int, | ||
// Nonsense values for now | ||
ion_type_code: OpcodeType::Nop, | ||
low_nibble: 0, | ||
}, | ||
|
||
// FlexUInts cannot have any annotations | ||
annotations_header_length: 0, | ||
annotations_sequence_length: 0, | ||
annotations_encoding: AnnotationsEncoding::SymbolAddress, | ||
|
||
header_offset: input.offset(), | ||
opcode_length: 0, | ||
length_length: 0, | ||
value_body_length: input.len(), | ||
total_length: input.len(), | ||
}; | ||
|
||
LazyRawBinaryValue_1_1 { | ||
encoded_value, | ||
input, | ||
} | ||
} | ||
|
||
/// Indicates the Ion data type of this value. Calling this method does not require additional | ||
/// parsing of the input stream. | ||
pub fn ion_type(&'top self) -> IonType { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -627,7 +627,7 @@ impl<'value, 'top> BinaryValueWriter_1_1<'value, 'top> { | |
MacroIdRef::LocalAddress(_address) => { | ||
todo!("macros with addresses higher than 64"); | ||
} | ||
} | ||
}; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🗺️ |
||
Ok(BinaryEExpWriter_1_1::new( | ||
self.allocator, | ||
self.encoding_buffer, | ||
|
@@ -832,6 +832,9 @@ impl<'value, 'top> BinaryAnnotatedValueWriter_1_1<'value, 'top> { | |
|
||
#[cfg(test)] | ||
mod tests { | ||
use num_traits::FloatConst; | ||
use rstest::rstest; | ||
|
||
use crate::ion_data::IonEq; | ||
use crate::lazy::encoder::annotate::{Annotatable, Annotated}; | ||
use crate::lazy::encoder::annotation_seq::AnnotationSeq; | ||
|
@@ -845,8 +848,6 @@ mod tests { | |
v1_1, Decimal, Element, Int, IonResult, IonType, Null, RawSymbolRef, SymbolId, Timestamp, | ||
Writer, | ||
}; | ||
use num_traits::FloatConst; | ||
use rstest::rstest; | ||
|
||
fn encoding_test( | ||
test: impl FnOnce(&mut LazyRawBinaryWriter_1_1<&mut Vec<u8>>) -> IonResult<()>, | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
🗺️ Rust v1.80.0 added some Clippy warnings around doc comment markdown formatting.