From b02602222df2b32979a515a606b5cfd79e047e8b Mon Sep 17 00:00:00 2001 From: Richard Giliam Date: Wed, 1 May 2024 01:30:50 -0700 Subject: [PATCH 1/4] Add 1.1 binary reader support for decimals --- src/lazy/binary/raw/v1_1/reader.rs | 55 +++++++++++++++++++++ src/lazy/binary/raw/v1_1/type_descriptor.rs | 3 ++ src/lazy/binary/raw/v1_1/value.rs | 28 ++++++++++- src/lazy/encoder/binary/v1_1/fixed_int.rs | 14 ++++++ 4 files changed, 98 insertions(+), 2 deletions(-) diff --git a/src/lazy/binary/raw/v1_1/reader.rs b/src/lazy/binary/raw/v1_1/reader.rs index e01a2c2c..11c6cdfc 100644 --- a/src/lazy/binary/raw/v1_1/reader.rs +++ b/src/lazy/binary/raw/v1_1/reader.rs @@ -418,6 +418,61 @@ mod tests { Ok(()) } + fn decimals() -> IonResult<()> { + use crate::types::decimal::Decimal; + + #[rustfmt::skip] + let data: Vec = vec![ + // IVM + 0xe0, 0x01, 0x01, 0xea, + + // 0d0 + 0x60, + + // 7d8 + 0x62, 0x01, 0x07, + + // 1.27 + 0xF6, 0x05, 0xFD, 0x7F, + + // 0d3 + 0x61, 0x07, + + // -0 + 0x62, 0x07, 0x00, + ]; + + let mut reader = LazyRawBinaryReader_1_1::new(&data); + let _ivm = reader.next()?.expect_ivm()?; + + assert_eq!( + reader.next()?.expect_value()?.read()?.expect_decimal()?, + 0.into() + ); + + assert_eq!( + reader.next()?.expect_value()?.read()?.expect_decimal()?, + 7.into() + ); + + assert_eq!( + reader.next()?.expect_value()?.read()?.expect_decimal()?, + 1.27f64.try_into()? + ); + + assert_eq!( + reader.next()?.expect_value()?.read()?.expect_decimal()?, + 0.0f64.try_into()? + ); + + assert_eq!( + reader.next()?.expect_value()?.read()?.expect_decimal()?, + Decimal::negative_zero() + ); + + Ok(()) + } + fn blobs() -> IonResult<()> { let data: Vec = vec![ 0xe0, 0x01, 0x01, 0xea, // IVM diff --git a/src/lazy/binary/raw/v1_1/type_descriptor.rs b/src/lazy/binary/raw/v1_1/type_descriptor.rs index 96dda27e..32bf22a9 100644 --- a/src/lazy/binary/raw/v1_1/type_descriptor.rs +++ b/src/lazy/binary/raw/v1_1/type_descriptor.rs @@ -43,6 +43,7 @@ impl Opcode { (0x5, 0x0..=0x8) => (Integer, low_nibble, Some(IonType::Int)), (0x5, 0xA..=0xD) => (Float, low_nibble, Some(IonType::Float)), (0x5, 0xE..=0xF) => (Boolean, low_nibble, Some(IonType::Bool)), + (0x6, _) => (Decimal, low_nibble, Some(IonType::Decimal)), (0x8, _) => (String, low_nibble, Some(IonType::String)), (0x9, _) => (InlineSymbol, low_nibble, Some(IonType::Symbol)), (0xE, 0x0) => (IonVersionMarker, low_nibble, None), @@ -50,6 +51,7 @@ impl Opcode { (0xE, 0xA) => (NullNull, low_nibble, Some(IonType::Null)), (0xE, 0xC..=0xD) => (Nop, low_nibble, None), (0xF, 0x5) => (LargeInteger, low_nibble, Some(IonType::Int)), + (0xF, 0x6) => (Decimal, 0xFF, Some(IonType::Decimal)), (0xF, 0x8) => (String, 0xFF, Some(IonType::String)), // 0xFF indicates >15 byte string. (0xF, 0x9) => (InlineSymbol, 0xFF, Some(IonType::Symbol)), (0xF, 0xE) => (Blob, low_nibble, Some(IonType::Blob)), @@ -123,6 +125,7 @@ impl Header { (OpcodeType::String, 0..=15) => InOpcode(self.length_code), (OpcodeType::InlineSymbol, n) if n < 16 => InOpcode(n), (OpcodeType::SymbolAddress, n) if n < 4 => InOpcode(n), + (OpcodeType::Decimal, 0..=15) => InOpcode(self.length_code), _ => FlexUIntFollows, } } diff --git a/src/lazy/binary/raw/v1_1/value.rs b/src/lazy/binary/raw/v1_1/value.rs index d1b87e63..fbd63a33 100644 --- a/src/lazy/binary/raw/v1_1/value.rs +++ b/src/lazy/binary/raw/v1_1/value.rs @@ -17,6 +17,7 @@ use crate::{ }, }, decoder::{LazyDecoder, LazyRawValue}, + encoder::binary::v1_1::fixed_int::FixedInt, encoding::BinaryEncoding_1_1, raw_value_ref::RawValueRef, }, @@ -213,7 +214,6 @@ impl<'top> LazyRawBinaryValue_1_1<'top> { /// Helper method called by [`Self::read`]. Reads the current value as an int. fn read_int(&self) -> ValueParseResult<'top, BinaryEncoding_1_1> { - use crate::lazy::encoder::binary::v1_1::fixed_int::FixedInt; debug_assert!(self.encoded_value.ion_type() == IonType::Int); let header = &self.encoded_value.header(); @@ -262,7 +262,31 @@ impl<'top> LazyRawBinaryValue_1_1<'top> { /// Helper method called by [`Self::read`]. Reads the current value as a decimal. fn read_decimal(&self) -> ValueParseResult<'top, BinaryEncoding_1_1> { - todo!(); + use crate::types::decimal::*; + + debug_assert!(self.encoded_value.ion_type() == IonType::Decimal); + let length_code = self.encoded_value.header.length_code as usize; + let decimal: Decimal = if length_code == 0 { + Decimal::new(0, 0) + } else { + use crate::lazy::encoder::binary::v1_1::flex_int::FlexInt; + + let value_bytes = self.value_body()?; + let exponent = FlexInt::read(value_bytes, 0)?; + let coefficient_size = self.encoded_value.value_length() - exponent.size_in_bytes(); + let coefficient = if coefficient_size > 0 { + FixedInt::read( + &value_bytes[exponent.size_in_bytes()..], + coefficient_size, + 0, + )? + } else { + 0i64.into() + }; + Decimal::new(coefficient, exponent.value()) + }; + + Ok(RawValueRef::Decimal(decimal)) } /// Helper method called by [`Self::read`]. Reads the current value as a timestamp. diff --git a/src/lazy/encoder/binary/v1_1/fixed_int.rs b/src/lazy/encoder/binary/v1_1/fixed_int.rs index b16efa0e..25562369 100644 --- a/src/lazy/encoder/binary/v1_1/fixed_int.rs +++ b/src/lazy/encoder/binary/v1_1/fixed_int.rs @@ -2,6 +2,7 @@ use std::io::Write; use num_bigint::BigInt; +use crate::decimal::coefficient::Coefficient; use crate::result::IonFailure; use crate::types::integer::IntData; use crate::{Int, IonResult}; @@ -99,6 +100,19 @@ impl From for Int { } } +impl From for Coefficient { + fn from(other: FixedInt) -> Self { + other.value.into() + } +} + +impl From for FixedInt { + fn from(other: i64) -> Self { + let encoded_size = FixedInt::encoded_size_i64(other); + FixedInt::new(encoded_size, other) + } +} + #[cfg(test)] mod tests { use num_bigint::BigInt; From 69f1721d84af18ba3e96e85963918c75051ade81 Mon Sep 17 00:00:00 2001 From: Richard Giliam Date: Thu, 9 May 2024 16:20:21 -0700 Subject: [PATCH 2/4] Address feedback --- src/lazy/binary/raw/v1_1/reader.rs | 118 +++++++++++++++++------------ src/lazy/binary/raw/v1_1/value.rs | 16 ++-- 2 files changed, 75 insertions(+), 59 deletions(-) diff --git a/src/lazy/binary/raw/v1_1/reader.rs b/src/lazy/binary/raw/v1_1/reader.rs index 11c6cdfc..3c6141ac 100644 --- a/src/lazy/binary/raw/v1_1/reader.rs +++ b/src/lazy/binary/raw/v1_1/reader.rs @@ -170,6 +170,7 @@ impl<'data> LazyRawReader<'data, BinaryEncoding_1_1> for LazyRawBinaryReader_1_1 mod tests { use crate::lazy::binary::raw::v1_1::reader::LazyRawBinaryReader_1_1; use crate::{IonResult, IonType}; + use rstest::*; #[test] fn nop() -> IonResult<()> { @@ -418,58 +419,77 @@ mod tests { Ok(()) } - fn decimals() -> IonResult<()> { - use crate::types::decimal::Decimal; - - #[rustfmt::skip] - let data: Vec = vec![ - // IVM - 0xe0, 0x01, 0x01, 0xea, - - // 0d0 - 0x60, - - // 7d8 - 0x62, 0x01, 0x07, - - // 1.27 - 0xF6, 0x05, 0xFD, 0x7F, - - // 0d3 - 0x61, 0x07, - - // -0 - 0x62, 0x07, 0x00, - ]; - - let mut reader = LazyRawBinaryReader_1_1::new(&data); - let _ivm = reader.next()?.expect_ivm()?; + #[rstest] + #[case("0.", &[0x60])] + #[case("0d1", &[0x61, 0x03])] + #[case("0d63", &[0x61, 0x7F])] + #[case("0d64", &[0x62, 0x02, 0x01])] + #[case("0d99", &[0x62, 0x8E, 0x01])] + #[case("0.0", &[0x61, 0xFF])] + #[case("0.00", &[0x61, 0xFD])] + #[case("0.000", &[0x61, 0xFB])] + #[case("0d-64", &[0x61, 0x81])] + #[case("0d-99", &[0x62, 0x76, 0xFE])] + #[case("-0.", &[0x62, 0x01, 0x00])] + #[case("-0d1", &[0x62, 0x03, 0x00])] + #[case("-0d3", &[0x62, 0x07, 0x00])] + #[case("-0d63", &[0x62, 0x7F, 0x00])] + #[case("-0d199", &[0x63, 0x1E, 0x03, 0x00])] + #[case("-0d-1", &[0x62, 0xFF, 0x00])] + #[case("-0d-2", &[0x62, 0xFD, 0x00])] + #[case("-0d-3", &[0x62, 0xFB, 0x00])] + #[case("-0d-63", &[0x62, 0x83, 0x00])] + #[case("-0d-64", &[0x62, 0x81, 0x00])] + #[case("-0d-65", &[0x63, 0xFE, 0xFE, 0x00])] + #[case("-0d-199", &[0x63, 0xE6, 0xFC, 0x00])] + #[case("0.01", &[0x62, 0xFD, 0x01])] + #[case("0.1", &[0x62, 0xFF, 0x01])] + #[case("1d0", &[0x62, 0x01, 0x01])] + #[case("1d1", &[0x62, 0x03, 0x01])] + #[case("1d2", &[0x62, 0x05, 0x01])] + #[case("1d63", &[0x62, 0x7F, 0x01])] + #[case("1d64", &[0x63, 0x02, 0x01, 0x01])] + #[case("1d65536", &[0x64, 0x04, 0x00, 0x08, 0x01])] + #[case("2.", &[0x62, 0x01, 0x02])] + #[case("7.", &[0x62, 0x01, 0x07])] + #[case("14d0", &[0x62, 0x01, 0x0E])] + #[case("14d0", &[0x63, 0x02, 0x00, 0x0E])] // overpadded exponent + #[case("14d0", &[0x64, 0x01, 0x0E, 0x00, 0x00])] // Overpadded coefficient + #[case("14d0", &[0x65, 0x02, 0x00, 0x0E, 0x00, 0x00])] // Overpadded coefficient and exponent + #[case("1.0", &[0x62, 0xFF, 0x0A])] + #[case("1.00", &[0x62, 0xFD, 0x64])] + #[case("1.27", &[0x62, 0xFD, 0x7F])] + #[case("1.28", &[0x63, 0xFD, 0x80, 0x00])] + #[case("3.142", &[0x63, 0xFB, 0x46, 0x0C])] + #[case("3.14159", &[0x64, 0xF7, 0x2F, 0xCB, 0x04])] + #[case("3.1415927", &[0x65, 0xF3, 0x77, 0x5E, 0xDF, 0x01])] + #[case("3.141592653", &[0x66, 0xEF, 0x4D, 0xE6, 0x40, 0xBB, 0x00])] + #[case("3.141592653590", &[0x67, 0xE9, 0x16, 0x9F, 0x83, 0x75, 0xDB, 0x02])] + #[case("3.14159265358979323", &[0x69, 0xDF, 0xFB, 0xA0, 0x9E, 0xF6, 0x2F, 0x1E, 0x5C, 0x04])] + #[case("3.1415926535897932384626", &[0x6B, 0xD5, 0x72, 0x49, 0x64, 0xCC, 0xAF, 0xEF, 0x8F, 0x0F, 0xA7, 0x06])] + #[case("3.141592653589793238462643383", &[0x6D, 0xCB, 0xB7, 0x3C, 0x92, 0x86, 0x40, 0x9F, 0x1B, 0x01, 0x1F, 0xAA, 0x26, 0x0A])] + #[case("3.14159265358979323846264338327950", &[0x6F, 0xC1, 0x8E, 0x29, 0xE5, 0xE3, 0x56, 0xD5, 0xDF, 0xC5, 0x10, 0x8F, 0x55, 0x3F, 0x7D, 0x0F])] + #[case("3.141592653589793238462643383279503", &[0xF6, 0x21, 0xBF, 0x8F, 0x9F, 0xF3, 0xE6, 0x64, 0x55, 0xBE, 0xBA, 0xA7, 0x96, 0x57, 0x79, 0xE4, 0x9A, 0x00])] + fn decimals(#[case] expected_txt: &str, #[case] ion_data: &[u8]) -> IonResult<()> { + use crate::lazy::decoder::{LazyRawReader, LazyRawValue}; + use crate::lazy::text::raw::v1_1::reader::LazyRawTextReader_1_1; + let bump = bumpalo::Bump::new(); + + let mut reader_txt = LazyRawTextReader_1_1::new(expected_txt.as_bytes()); + let mut reader_bin = LazyRawBinaryReader_1_1::new(ion_data); assert_eq!( - reader.next()?.expect_value()?.read()?.expect_decimal()?, - 0.into() - ); - - assert_eq!( - reader.next()?.expect_value()?.read()?.expect_decimal()?, - 7.into() - ); - - assert_eq!( - reader.next()?.expect_value()?.read()?.expect_decimal()?, - 1.27f64.try_into()? + reader_bin + .next()? + .expect_value()? + .read()? + .expect_decimal()?, + reader_txt + .next(&bump)? + .expect_value()? + .read()? + .expect_decimal()?, ); - - assert_eq!( - reader.next()?.expect_value()?.read()?.expect_decimal()?, - 0.0f64.try_into()? - ); - - assert_eq!( - reader.next()?.expect_value()?.read()?.expect_decimal()?, - Decimal::negative_zero() - ); - Ok(()) } diff --git a/src/lazy/binary/raw/v1_1/value.rs b/src/lazy/binary/raw/v1_1/value.rs index fbd63a33..bfd94c07 100644 --- a/src/lazy/binary/raw/v1_1/value.rs +++ b/src/lazy/binary/raw/v1_1/value.rs @@ -273,16 +273,12 @@ impl<'top> LazyRawBinaryValue_1_1<'top> { let value_bytes = self.value_body()?; let exponent = FlexInt::read(value_bytes, 0)?; - let coefficient_size = self.encoded_value.value_length() - exponent.size_in_bytes(); - let coefficient = if coefficient_size > 0 { - FixedInt::read( - &value_bytes[exponent.size_in_bytes()..], - coefficient_size, - 0, - )? - } else { - 0i64.into() - }; + let coefficient_size = self.encoded_value.value_body_length - exponent.size_in_bytes(); + let coefficient = FixedInt::read( + &value_bytes[exponent.size_in_bytes()..], + coefficient_size, + 0, + )?; Decimal::new(coefficient, exponent.value()) }; From 40e5279546587873d97edc2cf7d5b515e5be6a7a Mon Sep 17 00:00:00 2001 From: Richard Giliam Date: Fri, 10 May 2024 02:39:54 -0700 Subject: [PATCH 3/4] Add more unit tests, and add negative 0 encoding --- src/lazy/binary/raw/v1_1/reader.rs | 39 ++++++++++++++++++++++++++++++ src/lazy/binary/raw/v1_1/value.rs | 11 ++++++--- 2 files changed, 47 insertions(+), 3 deletions(-) diff --git a/src/lazy/binary/raw/v1_1/reader.rs b/src/lazy/binary/raw/v1_1/reader.rs index 3c6141ac..2a56a982 100644 --- a/src/lazy/binary/raw/v1_1/reader.rs +++ b/src/lazy/binary/raw/v1_1/reader.rs @@ -493,6 +493,45 @@ mod tests { Ok(()) } + #[rstest] + #[case("0.", &[0xF6, 0x01])] + #[case("0d99", &[0xF6, 0x05, 0x8E, 0x01])] + #[case("0.0", &[0xF6, 0x03, 0xFF])] + #[case("0.00", &[0xF6, 0x03, 0xFD])] + #[case("0d-99", &[0xF6, 0x05, 0x76, 0xFE])] + #[case("-0.", &[0xF6, 0x05, 0x01, 0x00])] + #[case("-0d199", &[0xF6, 0x07, 0x1E, 0x03, 0x00])] + #[case("-0d-1", &[0xF6, 0x05, 0xFF, 0x00])] + #[case("-0d-65", &[0xF6, 0x07, 0xFE, 0xFE, 0x00])] + #[case("0.01", &[0xF6, 0x05, 0xFD, 0x01])] + #[case("1.", &[0xF6, 0x05, 0x01, 0x01])] + #[case("1d65536", &[0xF6, 0x09, 0x04, 0x00, 0x08, 0x01])] + #[case("1.0", &[0xF6, 0x05, 0xFF, 0x0A])] + #[case("1.28", &[0xF6, 0x07, 0xFD, 0x80, 0x00])] + #[case("3.141592653590", &[0xF6, 0x0F, 0xE9, 0x16, 0x9F, 0x83, 0x75, 0xDB, 0x02])] + #[case("3.14159265358979323", &[0xF6, 0x13, 0xDF, 0xFB, 0xA0, 0x9E, 0xF6, 0x2F, 0x1E, 0x5C, 0x04])] + #[case("3.1415926535897932384626", &[0xF6, 0x17, 0xD5, 0x72, 0x49, 0x64, 0xCC, 0xAF, 0xEF, 0x8F, 0x0F, 0xA7, 0x06])] + #[case("3.141592653589793238462643383", &[0xF6, 0x1B, 0xCB, 0xB7, 0x3C, 0x92, 0x86, 0x40, 0x9F, 0x1B, 0x01, 0x1F, 0xAA, 0x26, 0x0A])] + #[case("3.14159265358979323846264338327950", &[0xF6, 0x1F, 0xC1, 0x8E, 0x29, 0xE5, 0xE3, 0x56, 0xD5, 0xDF, 0xC5, 0x10, 0x8F, 0x55, 0x3F, 0x7D, 0x0F])] + fn decimals_long(#[case] expected_txt: &str, #[case] ion_data: &[u8]) -> IonResult<()> { + use crate::ion_data::IonEq; + use crate::lazy::decoder::{LazyRawReader, LazyRawValue}; + use crate::lazy::text::raw::v1_1::reader::LazyRawTextReader_1_1; + let bump = bumpalo::Bump::new(); + + let mut reader_txt = LazyRawTextReader_1_1::new(expected_txt.as_bytes()); + let mut reader_bin = LazyRawBinaryReader_1_1::new(ion_data); + + let expected_value = reader_txt.next(&bump)?.expect_value()?.read()?; + let actual_value = reader_bin.next()?.expect_value()?.read()?; + + assert!(actual_value + .expect_decimal()? + .ion_eq(&expected_value.expect_decimal()?)); + + Ok(()) + } + fn blobs() -> IonResult<()> { let data: Vec = vec![ 0xe0, 0x01, 0x01, 0xea, // IVM diff --git a/src/lazy/binary/raw/v1_1/value.rs b/src/lazy/binary/raw/v1_1/value.rs index bfd94c07..53d6af5e 100644 --- a/src/lazy/binary/raw/v1_1/value.rs +++ b/src/lazy/binary/raw/v1_1/value.rs @@ -265,8 +265,7 @@ impl<'top> LazyRawBinaryValue_1_1<'top> { use crate::types::decimal::*; debug_assert!(self.encoded_value.ion_type() == IonType::Decimal); - let length_code = self.encoded_value.header.length_code as usize; - let decimal: Decimal = if length_code == 0 { + let decimal: Decimal = if self.encoded_value.value_body_length == 0 { Decimal::new(0, 0) } else { use crate::lazy::encoder::binary::v1_1::flex_int::FlexInt; @@ -279,7 +278,13 @@ impl<'top> LazyRawBinaryValue_1_1<'top> { coefficient_size, 0, )?; - Decimal::new(coefficient, exponent.value()) + + // Handle special -0 encoding. + if coefficient_size > 0 && coefficient.value().as_i64().is_some_and(|c| c == 0) { + Decimal::negative_zero_with_exponent(exponent.value()) + } else { + Decimal::new(coefficient, exponent.value()) + } }; Ok(RawValueRef::Decimal(decimal)) From 14fa06a5354a70cacc37ddc45594ce9941be3e9a Mon Sep 17 00:00:00 2001 From: Richard Giliam Date: Fri, 10 May 2024 02:57:16 -0700 Subject: [PATCH 4/4] Well that was a silly idea; remove use of 1.70 function that simplified nothing to keep at 1.65 --- src/lazy/binary/raw/v1_1/value.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lazy/binary/raw/v1_1/value.rs b/src/lazy/binary/raw/v1_1/value.rs index 53d6af5e..bf12db88 100644 --- a/src/lazy/binary/raw/v1_1/value.rs +++ b/src/lazy/binary/raw/v1_1/value.rs @@ -280,7 +280,7 @@ impl<'top> LazyRawBinaryValue_1_1<'top> { )?; // Handle special -0 encoding. - if coefficient_size > 0 && coefficient.value().as_i64().is_some_and(|c| c == 0) { + if coefficient_size > 0 && coefficient.value().as_i64() == Some(0) { Decimal::negative_zero_with_exponent(exponent.value()) } else { Decimal::new(coefficient, exponent.value())