From fa11f260f2f2fce8c9eface821626bded1e6e831 Mon Sep 17 00:00:00 2001 From: Richard Giliam Date: Mon, 20 May 2024 20:51:36 -0700 Subject: [PATCH] Add 1.1 binary reader support for decimals (#757) * Add 1.1 binary reader support for decimals * Address feedback * Add more unit tests, and add negative 0 encoding --- src/lazy/binary/raw/v1_1/reader.rs | 114 ++++++++++++++++++++ src/lazy/binary/raw/v1_1/type_descriptor.rs | 3 + src/lazy/binary/raw/v1_1/value.rs | 29 ++++- src/lazy/encoder/binary/v1_1/fixed_int.rs | 14 +++ 4 files changed, 158 insertions(+), 2 deletions(-) diff --git a/src/lazy/binary/raw/v1_1/reader.rs b/src/lazy/binary/raw/v1_1/reader.rs index e01a2c2c..2a56a982 100644 --- a/src/lazy/binary/raw/v1_1/reader.rs +++ b/src/lazy/binary/raw/v1_1/reader.rs @@ -170,6 +170,7 @@ impl<'data> LazyRawReader<'data, BinaryEncoding_1_1> for LazyRawBinaryReader_1_1 mod tests { use crate::lazy::binary::raw::v1_1::reader::LazyRawBinaryReader_1_1; use crate::{IonResult, IonType}; + use rstest::*; #[test] fn nop() -> IonResult<()> { @@ -418,6 +419,119 @@ mod tests { Ok(()) } + #[rstest] + #[case("0.", &[0x60])] + #[case("0d1", &[0x61, 0x03])] + #[case("0d63", &[0x61, 0x7F])] + #[case("0d64", &[0x62, 0x02, 0x01])] + #[case("0d99", &[0x62, 0x8E, 0x01])] + #[case("0.0", &[0x61, 0xFF])] + #[case("0.00", &[0x61, 0xFD])] + #[case("0.000", &[0x61, 0xFB])] + #[case("0d-64", &[0x61, 0x81])] + #[case("0d-99", &[0x62, 0x76, 0xFE])] + #[case("-0.", &[0x62, 0x01, 0x00])] + #[case("-0d1", &[0x62, 0x03, 0x00])] + #[case("-0d3", &[0x62, 0x07, 0x00])] + #[case("-0d63", &[0x62, 0x7F, 0x00])] + #[case("-0d199", &[0x63, 0x1E, 0x03, 0x00])] + #[case("-0d-1", &[0x62, 0xFF, 0x00])] + #[case("-0d-2", &[0x62, 0xFD, 0x00])] + #[case("-0d-3", &[0x62, 0xFB, 0x00])] + #[case("-0d-63", &[0x62, 0x83, 0x00])] + #[case("-0d-64", &[0x62, 0x81, 0x00])] + #[case("-0d-65", &[0x63, 0xFE, 0xFE, 0x00])] + #[case("-0d-199", &[0x63, 0xE6, 0xFC, 0x00])] + #[case("0.01", &[0x62, 0xFD, 0x01])] + #[case("0.1", &[0x62, 0xFF, 0x01])] + #[case("1d0", &[0x62, 0x01, 0x01])] + #[case("1d1", &[0x62, 0x03, 0x01])] + #[case("1d2", &[0x62, 0x05, 0x01])] + #[case("1d63", &[0x62, 0x7F, 0x01])] + #[case("1d64", &[0x63, 0x02, 0x01, 0x01])] + #[case("1d65536", &[0x64, 0x04, 0x00, 0x08, 0x01])] + #[case("2.", &[0x62, 0x01, 0x02])] + #[case("7.", &[0x62, 0x01, 0x07])] + #[case("14d0", &[0x62, 0x01, 0x0E])] + #[case("14d0", &[0x63, 0x02, 0x00, 0x0E])] // overpadded exponent + #[case("14d0", &[0x64, 0x01, 0x0E, 0x00, 0x00])] // Overpadded coefficient + #[case("14d0", &[0x65, 0x02, 0x00, 0x0E, 0x00, 0x00])] // Overpadded coefficient and exponent + #[case("1.0", &[0x62, 0xFF, 0x0A])] + #[case("1.00", &[0x62, 0xFD, 0x64])] + #[case("1.27", &[0x62, 0xFD, 0x7F])] + #[case("1.28", &[0x63, 0xFD, 0x80, 0x00])] + #[case("3.142", &[0x63, 0xFB, 0x46, 0x0C])] + #[case("3.14159", &[0x64, 0xF7, 0x2F, 0xCB, 0x04])] + #[case("3.1415927", &[0x65, 0xF3, 0x77, 0x5E, 0xDF, 0x01])] + #[case("3.141592653", &[0x66, 0xEF, 0x4D, 0xE6, 0x40, 0xBB, 0x00])] + #[case("3.141592653590", &[0x67, 0xE9, 0x16, 0x9F, 0x83, 0x75, 0xDB, 0x02])] + #[case("3.14159265358979323", &[0x69, 0xDF, 0xFB, 0xA0, 0x9E, 0xF6, 0x2F, 0x1E, 0x5C, 0x04])] + #[case("3.1415926535897932384626", &[0x6B, 0xD5, 0x72, 0x49, 0x64, 0xCC, 0xAF, 0xEF, 0x8F, 0x0F, 0xA7, 0x06])] + #[case("3.141592653589793238462643383", &[0x6D, 0xCB, 0xB7, 0x3C, 0x92, 0x86, 0x40, 0x9F, 0x1B, 0x01, 0x1F, 0xAA, 0x26, 0x0A])] + #[case("3.14159265358979323846264338327950", &[0x6F, 0xC1, 0x8E, 0x29, 0xE5, 0xE3, 0x56, 0xD5, 0xDF, 0xC5, 0x10, 0x8F, 0x55, 0x3F, 0x7D, 0x0F])] + #[case("3.141592653589793238462643383279503", &[0xF6, 0x21, 0xBF, 0x8F, 0x9F, 0xF3, 0xE6, 0x64, 0x55, 0xBE, 0xBA, 0xA7, 0x96, 0x57, 0x79, 0xE4, 0x9A, 0x00])] + fn decimals(#[case] expected_txt: &str, #[case] ion_data: &[u8]) -> IonResult<()> { + use crate::lazy::decoder::{LazyRawReader, LazyRawValue}; + use crate::lazy::text::raw::v1_1::reader::LazyRawTextReader_1_1; + let bump = bumpalo::Bump::new(); + + let mut reader_txt = LazyRawTextReader_1_1::new(expected_txt.as_bytes()); + let mut reader_bin = LazyRawBinaryReader_1_1::new(ion_data); + + assert_eq!( + reader_bin + .next()? + .expect_value()? + .read()? + .expect_decimal()?, + reader_txt + .next(&bump)? + .expect_value()? + .read()? + .expect_decimal()?, + ); + Ok(()) + } + + #[rstest] + #[case("0.", &[0xF6, 0x01])] + #[case("0d99", &[0xF6, 0x05, 0x8E, 0x01])] + #[case("0.0", &[0xF6, 0x03, 0xFF])] + #[case("0.00", &[0xF6, 0x03, 0xFD])] + #[case("0d-99", &[0xF6, 0x05, 0x76, 0xFE])] + #[case("-0.", &[0xF6, 0x05, 0x01, 0x00])] + #[case("-0d199", &[0xF6, 0x07, 0x1E, 0x03, 0x00])] + #[case("-0d-1", &[0xF6, 0x05, 0xFF, 0x00])] + #[case("-0d-65", &[0xF6, 0x07, 0xFE, 0xFE, 0x00])] + #[case("0.01", &[0xF6, 0x05, 0xFD, 0x01])] + #[case("1.", &[0xF6, 0x05, 0x01, 0x01])] + #[case("1d65536", &[0xF6, 0x09, 0x04, 0x00, 0x08, 0x01])] + #[case("1.0", &[0xF6, 0x05, 0xFF, 0x0A])] + #[case("1.28", &[0xF6, 0x07, 0xFD, 0x80, 0x00])] + #[case("3.141592653590", &[0xF6, 0x0F, 0xE9, 0x16, 0x9F, 0x83, 0x75, 0xDB, 0x02])] + #[case("3.14159265358979323", &[0xF6, 0x13, 0xDF, 0xFB, 0xA0, 0x9E, 0xF6, 0x2F, 0x1E, 0x5C, 0x04])] + #[case("3.1415926535897932384626", &[0xF6, 0x17, 0xD5, 0x72, 0x49, 0x64, 0xCC, 0xAF, 0xEF, 0x8F, 0x0F, 0xA7, 0x06])] + #[case("3.141592653589793238462643383", &[0xF6, 0x1B, 0xCB, 0xB7, 0x3C, 0x92, 0x86, 0x40, 0x9F, 0x1B, 0x01, 0x1F, 0xAA, 0x26, 0x0A])] + #[case("3.14159265358979323846264338327950", &[0xF6, 0x1F, 0xC1, 0x8E, 0x29, 0xE5, 0xE3, 0x56, 0xD5, 0xDF, 0xC5, 0x10, 0x8F, 0x55, 0x3F, 0x7D, 0x0F])] + fn decimals_long(#[case] expected_txt: &str, #[case] ion_data: &[u8]) -> IonResult<()> { + use crate::ion_data::IonEq; + use crate::lazy::decoder::{LazyRawReader, LazyRawValue}; + use crate::lazy::text::raw::v1_1::reader::LazyRawTextReader_1_1; + let bump = bumpalo::Bump::new(); + + let mut reader_txt = LazyRawTextReader_1_1::new(expected_txt.as_bytes()); + let mut reader_bin = LazyRawBinaryReader_1_1::new(ion_data); + + let expected_value = reader_txt.next(&bump)?.expect_value()?.read()?; + let actual_value = reader_bin.next()?.expect_value()?.read()?; + + assert!(actual_value + .expect_decimal()? + .ion_eq(&expected_value.expect_decimal()?)); + + Ok(()) + } + fn blobs() -> IonResult<()> { let data: Vec = vec![ 0xe0, 0x01, 0x01, 0xea, // IVM diff --git a/src/lazy/binary/raw/v1_1/type_descriptor.rs b/src/lazy/binary/raw/v1_1/type_descriptor.rs index 96dda27e..32bf22a9 100644 --- a/src/lazy/binary/raw/v1_1/type_descriptor.rs +++ b/src/lazy/binary/raw/v1_1/type_descriptor.rs @@ -43,6 +43,7 @@ impl Opcode { (0x5, 0x0..=0x8) => (Integer, low_nibble, Some(IonType::Int)), (0x5, 0xA..=0xD) => (Float, low_nibble, Some(IonType::Float)), (0x5, 0xE..=0xF) => (Boolean, low_nibble, Some(IonType::Bool)), + (0x6, _) => (Decimal, low_nibble, Some(IonType::Decimal)), (0x8, _) => (String, low_nibble, Some(IonType::String)), (0x9, _) => (InlineSymbol, low_nibble, Some(IonType::Symbol)), (0xE, 0x0) => (IonVersionMarker, low_nibble, None), @@ -50,6 +51,7 @@ impl Opcode { (0xE, 0xA) => (NullNull, low_nibble, Some(IonType::Null)), (0xE, 0xC..=0xD) => (Nop, low_nibble, None), (0xF, 0x5) => (LargeInteger, low_nibble, Some(IonType::Int)), + (0xF, 0x6) => (Decimal, 0xFF, Some(IonType::Decimal)), (0xF, 0x8) => (String, 0xFF, Some(IonType::String)), // 0xFF indicates >15 byte string. (0xF, 0x9) => (InlineSymbol, 0xFF, Some(IonType::Symbol)), (0xF, 0xE) => (Blob, low_nibble, Some(IonType::Blob)), @@ -123,6 +125,7 @@ impl Header { (OpcodeType::String, 0..=15) => InOpcode(self.length_code), (OpcodeType::InlineSymbol, n) if n < 16 => InOpcode(n), (OpcodeType::SymbolAddress, n) if n < 4 => InOpcode(n), + (OpcodeType::Decimal, 0..=15) => InOpcode(self.length_code), _ => FlexUIntFollows, } } diff --git a/src/lazy/binary/raw/v1_1/value.rs b/src/lazy/binary/raw/v1_1/value.rs index d1b87e63..bf12db88 100644 --- a/src/lazy/binary/raw/v1_1/value.rs +++ b/src/lazy/binary/raw/v1_1/value.rs @@ -17,6 +17,7 @@ use crate::{ }, }, decoder::{LazyDecoder, LazyRawValue}, + encoder::binary::v1_1::fixed_int::FixedInt, encoding::BinaryEncoding_1_1, raw_value_ref::RawValueRef, }, @@ -213,7 +214,6 @@ impl<'top> LazyRawBinaryValue_1_1<'top> { /// Helper method called by [`Self::read`]. Reads the current value as an int. fn read_int(&self) -> ValueParseResult<'top, BinaryEncoding_1_1> { - use crate::lazy::encoder::binary::v1_1::fixed_int::FixedInt; debug_assert!(self.encoded_value.ion_type() == IonType::Int); let header = &self.encoded_value.header(); @@ -262,7 +262,32 @@ impl<'top> LazyRawBinaryValue_1_1<'top> { /// Helper method called by [`Self::read`]. Reads the current value as a decimal. fn read_decimal(&self) -> ValueParseResult<'top, BinaryEncoding_1_1> { - todo!(); + use crate::types::decimal::*; + + debug_assert!(self.encoded_value.ion_type() == IonType::Decimal); + let decimal: Decimal = if self.encoded_value.value_body_length == 0 { + Decimal::new(0, 0) + } else { + use crate::lazy::encoder::binary::v1_1::flex_int::FlexInt; + + let value_bytes = self.value_body()?; + let exponent = FlexInt::read(value_bytes, 0)?; + let coefficient_size = self.encoded_value.value_body_length - exponent.size_in_bytes(); + let coefficient = FixedInt::read( + &value_bytes[exponent.size_in_bytes()..], + coefficient_size, + 0, + )?; + + // Handle special -0 encoding. + if coefficient_size > 0 && coefficient.value().as_i64() == Some(0) { + Decimal::negative_zero_with_exponent(exponent.value()) + } else { + Decimal::new(coefficient, exponent.value()) + } + }; + + Ok(RawValueRef::Decimal(decimal)) } /// Helper method called by [`Self::read`]. Reads the current value as a timestamp. diff --git a/src/lazy/encoder/binary/v1_1/fixed_int.rs b/src/lazy/encoder/binary/v1_1/fixed_int.rs index b16efa0e..25562369 100644 --- a/src/lazy/encoder/binary/v1_1/fixed_int.rs +++ b/src/lazy/encoder/binary/v1_1/fixed_int.rs @@ -2,6 +2,7 @@ use std::io::Write; use num_bigint::BigInt; +use crate::decimal::coefficient::Coefficient; use crate::result::IonFailure; use crate::types::integer::IntData; use crate::{Int, IonResult}; @@ -99,6 +100,19 @@ impl From for Int { } } +impl From for Coefficient { + fn from(other: FixedInt) -> Self { + other.value.into() + } +} + +impl From for FixedInt { + fn from(other: i64) -> Self { + let encoded_size = FixedInt::encoded_size_i64(other); + FixedInt::new(encoded_size, other) + } +} + #[cfg(test)] mod tests { use num_bigint::BigInt;