From 4e8739f75b737458a88cfea95b4972b3bf94313d Mon Sep 17 00:00:00 2001 From: Zack Slayton Date: Sat, 17 Aug 2024 09:27:11 -0400 Subject: [PATCH] Adds config option for writing struct field names as text/sid (#816) --- .../encoder/binary/v1_0/container_writers.rs | 6 +- src/lazy/encoder/binary/v1_0/value_writer.rs | 1 + .../encoder/binary/v1_1/container_writers.rs | 14 +- src/lazy/encoder/binary/v1_1/value_writer.rs | 19 +- src/lazy/encoder/text/v1_0/value_writer.rs | 8 +- src/lazy/encoder/text/v1_1/value_writer.rs | 6 +- src/lazy/encoder/value_writer.rs | 18 +- src/lazy/encoder/value_writer_config.rs | 49 ++- src/lazy/encoder/writer.rs | 292 +++++++++++++----- src/lazy/encoding.rs | 32 +- src/lazy/expanded/mod.rs | 4 - src/lazy/never.rs | 6 +- src/lazy/struct.rs | 7 + 13 files changed, 334 insertions(+), 128 deletions(-) diff --git a/src/lazy/encoder/binary/v1_0/container_writers.rs b/src/lazy/encoder/binary/v1_0/container_writers.rs index 19248a62..bd08027f 100644 --- a/src/lazy/encoder/binary/v1_0/container_writers.rs +++ b/src/lazy/encoder/binary/v1_0/container_writers.rs @@ -9,7 +9,7 @@ use crate::lazy::encoder::value_writer::{SequenceWriter, StructWriter}; use crate::lazy::encoder::write_as_ion::WriteAsIon; use crate::raw_symbol_ref::AsRawSymbolRef; use crate::result::{EncodingError, IonFailure}; -use crate::{IonError, IonResult, RawSymbolRef, SymbolId}; +use crate::{v1_0, Encoding, IonError, IonResult, RawSymbolRef, SymbolId, ValueWriterConfig}; /// A helper type that holds fields and logic that is common to [`BinaryListWriter_1_0`], /// [`BinarySExpWriter_1_0`], and [`BinaryStructWriter_1_0`]. @@ -338,4 +338,8 @@ impl<'value, 'top> StructWriter for BinaryStructWriter_1_0<'value, 'top> { fn close(self) -> IonResult<()> { self.container_writer.end() } + + fn config(&self) -> ValueWriterConfig { + v1_0::Binary::default_value_writer_config() + } } diff --git a/src/lazy/encoder/binary/v1_0/value_writer.rs b/src/lazy/encoder/binary/v1_0/value_writer.rs index e485d71c..11dc704e 100644 --- a/src/lazy/encoder/binary/v1_0/value_writer.rs +++ b/src/lazy/encoder/binary/v1_0/value_writer.rs @@ -428,6 +428,7 @@ impl<'value, 'top> ValueWriter for BinaryAnnotatedValueWriter_1_0<'value, 'top> impl AsRef<[u8]> => write_clob, impl AsRef<[u8]> => write_blob, ); + fn list_writer(self) -> IonResult { BinaryListWriter_1_0::new(self.allocator, self.output_buffer) .with_annotations(self.annotations) diff --git a/src/lazy/encoder/binary/v1_1/container_writers.rs b/src/lazy/encoder/binary/v1_1/container_writers.rs index 4def4c95..5d743a48 100644 --- a/src/lazy/encoder/binary/v1_1/container_writers.rs +++ b/src/lazy/encoder/binary/v1_1/container_writers.rs @@ -8,7 +8,7 @@ use crate::lazy::encoder::value_writer::{EExpWriter, SequenceWriter, StructWrite use crate::lazy::encoder::value_writer_config::ValueWriterConfig; use crate::lazy::encoder::write_as_ion::WriteAsIon; use crate::raw_symbol_ref::AsRawSymbolRef; -use crate::{IonResult, UInt}; +use crate::{v1_1, Encoding, IonResult, UInt}; /// A helper type that holds fields and logic that is common to [`BinaryListWriter_1_1`], /// [`BinarySExpWriter_1_1`], and [`BinaryStructWriter_1_1`]. @@ -23,7 +23,7 @@ pub(crate) struct BinaryContainerWriter_1_1<'value, 'top> { // An allocator reference that can be shared with nested container writers allocator: &'top BumpAllocator, encoder: ContainerEncodingKind<'value, 'top>, - write_options: ValueWriterConfig, + value_writer_config: ValueWriterConfig, } enum ContainerEncodingKind<'value, 'top> { @@ -69,7 +69,7 @@ impl<'value, 'top> BinaryContainerWriter_1_1<'value, 'top> { Self { allocator, encoder, - write_options, + value_writer_config: write_options, } } @@ -90,7 +90,7 @@ impl<'value, 'top> BinaryContainerWriter_1_1<'value, 'top> { Self { allocator, encoder, - write_options, + value_writer_config: write_options, } } @@ -108,7 +108,7 @@ impl<'value, 'top> BinaryContainerWriter_1_1<'value, 'top> { } pub fn config(&self) -> ValueWriterConfig { - self.write_options + self.value_writer_config } /// Constructs a new [`BinaryValueWriter_1_1`] using this [`BinaryContainerWriter_1_1`]'s @@ -390,6 +390,10 @@ impl<'value, 'top> StructWriter for BinaryStructWriter_1_1<'value, 'top> { } self.container_writer.end() } + + fn config(&self) -> ValueWriterConfig { + v1_1::Binary::default_value_writer_config() + } } pub struct BinaryEExpWriter_1_1<'value, 'top> { diff --git a/src/lazy/encoder/binary/v1_1/value_writer.rs b/src/lazy/encoder/binary/v1_1/value_writer.rs index 6e65e8ee..3a931f12 100644 --- a/src/lazy/encoder/binary/v1_1/value_writer.rs +++ b/src/lazy/encoder/binary/v1_1/value_writer.rs @@ -59,7 +59,9 @@ impl<'value, 'top> BinaryValueWriter_1_1<'value, 'top> { } pub fn with_inline_symbol_text(mut self) -> Self { - self.value_writer_config = self.value_writer_config.with_delimited_containers(); + self.value_writer_config = self + .value_writer_config + .with_symbol_value_encoding(SymbolValueEncoding::WriteAsInlineText); self } @@ -721,10 +723,6 @@ impl<'value, 'top> ValueWriter for BinaryValueWriter_1_1<'value, 'top> { type EExpWriter = BinaryEExpWriter_1_1<'value, 'top>; delegate_value_writer_to_self!(); - - fn config(&self) -> ValueWriterConfig { - self.config() - } } /// Takes a series of `TYPE => METHOD` pairs, generating a function for each that encodes an @@ -741,7 +739,7 @@ macro_rules! annotate_and_delegate_1_1 { let value_writer = $crate::lazy::encoder::binary::v1_1::value_writer::BinaryValueWriter_1_1::new( self.allocator, self.buffer, - self.config(), + self.value_writer_config, ); value_writer.$method(value)?; Ok(()) @@ -821,7 +819,7 @@ impl<'value, 'top> AnnotatableWriter for BinaryAnnotatedValueWriter_1_1<'value, self.allocator, self.buffer, annotations.into_annotations_vec(), - self.config(), + self.value_writer_config, )) } } @@ -868,10 +866,6 @@ impl<'value, 'top> ValueWriter for BinaryAnnotatedValueWriter_1_1<'value, 'top> } self.value_writer().eexp_writer(macro_id) } - - fn config(&self) -> ValueWriterConfig { - self.value_writer_config - } } impl<'value, 'top> BinaryAnnotatedValueWriter_1_1<'value, 'top> { @@ -889,7 +883,8 @@ impl<'value, 'top> BinaryAnnotatedValueWriter_1_1<'value, 'top> { } } pub(crate) fn value_writer(self) -> BinaryValueWriter_1_1<'value, 'top> { - let writer = BinaryValueWriter_1_1::new(self.allocator, self.buffer, self.config()); + let writer = + BinaryValueWriter_1_1::new(self.allocator, self.buffer, self.value_writer_config); writer } diff --git a/src/lazy/encoder/text/v1_0/value_writer.rs b/src/lazy/encoder/text/v1_0/value_writer.rs index 0b9a7c99..2d45d887 100644 --- a/src/lazy/encoder/text/v1_0/value_writer.rs +++ b/src/lazy/encoder/text/v1_0/value_writer.rs @@ -18,7 +18,9 @@ use crate::result::IonFailure; use crate::text::text_formatter::{FmtValueFormatter, IoValueFormatter}; use crate::text::whitespace_config::WhitespaceConfig; use crate::types::{ContainerType, ParentType}; -use crate::{Decimal, Int, IonResult, IonType, RawSymbolRef, Timestamp}; +use crate::{ + v1_0, Decimal, Encoding, Int, IonResult, IonType, RawSymbolRef, Timestamp, ValueWriterConfig, +}; pub struct TextValueWriter_1_0<'value, W: Write + 'value> { pub(crate) writer: &'value mut LazyRawTextWriter_1_0, @@ -448,6 +450,10 @@ impl<'value, W: Write> StructWriter for TextStructWriter_1_0<'value, W> { fn close(self) -> IonResult<()> { self.end() } + + fn config(&self) -> ValueWriterConfig { + v1_0::Text::default_value_writer_config() + } } impl<'value, W: Write + 'value> AnnotatableWriter for TextAnnotatedValueWriter_1_0<'value, W> { diff --git a/src/lazy/encoder/text/v1_1/value_writer.rs b/src/lazy/encoder/text/v1_1/value_writer.rs index 21a31b1f..566da220 100644 --- a/src/lazy/encoder/text/v1_1/value_writer.rs +++ b/src/lazy/encoder/text/v1_1/value_writer.rs @@ -12,7 +12,7 @@ use crate::lazy::text::raw::v1_1::reader::MacroIdRef; use crate::raw_symbol_ref::AsRawSymbolRef; use crate::result::IonFailure; use crate::types::{ContainerType, ParentType}; -use crate::{Decimal, Int, IonResult, IonType, Timestamp}; +use crate::{v1_1, Decimal, Encoding, Int, IonResult, IonType, Timestamp, ValueWriterConfig}; use delegate::delegate; use std::io::Write; @@ -230,6 +230,10 @@ impl<'value, W: Write> StructWriter for TextStructWriter_1_1<'value, W> { fn close(self) -> IonResult<()> { self.writer_1_0.close() } + + fn config(&self) -> ValueWriterConfig { + v1_1::Text::default_value_writer_config() + } } pub struct TextEExpWriter_1_1<'value, W: Write> { diff --git a/src/lazy/encoder/value_writer.rs b/src/lazy/encoder/value_writer.rs index 9143bcf5..1f9618b2 100644 --- a/src/lazy/encoder/value_writer.rs +++ b/src/lazy/encoder/value_writer.rs @@ -99,10 +99,6 @@ pub trait ValueWriter: AnnotatableWriter + Sized { strukt.write_all(values)?; strukt.close() } - - fn config(&self) -> ValueWriterConfig { - ValueWriterConfig::default() - } } /// There are several implementations of `ValueWriter` that simply delegate calls to an expression. @@ -183,6 +179,7 @@ macro_rules! delegate_value_writer_to { self, macro_id: impl Into>, ) -> IonResult; + } } }; @@ -204,13 +201,19 @@ pub(crate) use delegate_value_writer_to_self; pub struct FieldWriter<'field, StructWriterType> { name: RawSymbolRef<'field>, struct_writer: &'field mut StructWriterType, + value_writer_config: ValueWriterConfig, } impl<'field, StructWriterType> FieldWriter<'field, StructWriterType> { - pub fn new(name: RawSymbolRef<'field>, struct_writer: &'field mut StructWriterType) -> Self { + pub(crate) fn new( + name: RawSymbolRef<'field>, + value_writer_config: ValueWriterConfig, + struct_writer: &'field mut StructWriterType, + ) -> Self { Self { name, struct_writer, + value_writer_config, } } } @@ -334,10 +337,11 @@ pub trait StructWriter: FieldEncoder + MakeValueWriter + Sized { } fn field_writer<'a>(&'a mut self, name: impl Into>) -> FieldWriter<'a, Self> { - FieldWriter::new(name.into(), self) + FieldWriter::new(name.into(), self.config(), self) } - fn close(self) -> IonResult<()>; + + fn config(&self) -> ValueWriterConfig; } /// Takes a series of `TYPE => METHOD` pairs, generating a function for each that calls the diff --git a/src/lazy/encoder/value_writer_config.rs b/src/lazy/encoder/value_writer_config.rs index b0db84fd..f5d30662 100644 --- a/src/lazy/encoder/value_writer_config.rs +++ b/src/lazy/encoder/value_writer_config.rs @@ -78,49 +78,64 @@ pub enum FieldNameEncoding { } impl ValueWriterConfig { - /// Constructs a default `ValueWriterConfig`. - pub fn new() -> Self { - ValueWriterConfig::default() + /// Constructs a `ValueWriterConfig` that writes all symbol tokens as inline text. + pub const fn text() -> Self { + ValueWriterConfig { + container_encoding: ContainerEncoding::Delimited, + symbol_value_encoding: SymbolValueEncoding::WriteAsInlineText, + annotations_encoding: AnnotationsEncoding::WriteAsInlineText, + field_name_encoding: FieldNameEncoding::WriteAsInlineText, + } } - pub fn container_encoding(&self) -> ContainerEncoding { + /// Constructs a `ValueWriterConfig` that writes all symbol tokens as symbol IDs. + pub const fn binary() -> Self { + ValueWriterConfig { + container_encoding: ContainerEncoding::LengthPrefixed, + symbol_value_encoding: SymbolValueEncoding::WriteAsSymbolIds, + annotations_encoding: AnnotationsEncoding::WriteAsSymbolIds, + field_name_encoding: FieldNameEncoding::WriteAsSymbolIds, + } + } + + pub const fn container_encoding(&self) -> ContainerEncoding { self.container_encoding } - pub fn symbol_value_encoding(&self) -> SymbolValueEncoding { + pub const fn symbol_value_encoding(&self) -> SymbolValueEncoding { self.symbol_value_encoding } - pub fn field_name_encoding(&self) -> FieldNameEncoding { + pub const fn field_name_encoding(&self) -> FieldNameEncoding { self.field_name_encoding } - pub fn annotations_encoding(&self) -> AnnotationsEncoding { + pub const fn annotations_encoding(&self) -> AnnotationsEncoding { self.annotations_encoding } /// Returns `true` if this value writer will write nested containers with a delimited encoding. - pub fn has_delimited_containers(&self) -> bool { - self.container_encoding == ContainerEncoding::Delimited + pub const fn has_delimited_containers(&self) -> bool { + matches!(self.container_encoding, ContainerEncoding::Delimited) } /// Configures this value writer will write nested containers using a delimited encoding. If it /// is `false`, nested containers will be length-prefixed. - pub fn with_delimited_containers(mut self) -> Self { + pub const fn with_delimited_containers(mut self) -> Self { self.container_encoding = ContainerEncoding::Delimited; self } /// If `delimited_containers` is `true`, this value writer will write nested containers using /// a delimited encoding. If it is `false`, nested containers will be length-prefixed. - pub fn with_container_encoding(mut self, container_encoding: ContainerEncoding) -> Self { + pub const fn with_container_encoding(mut self, container_encoding: ContainerEncoding) -> Self { self.container_encoding = container_encoding; self } /// Configures this value writer to write symbol values and annotations with their UTF-8 text /// inline. - pub fn with_symbol_value_encoding( + pub const fn with_symbol_value_encoding( mut self, symbol_value_encoding: SymbolValueEncoding, ) -> Self { @@ -129,14 +144,20 @@ impl ValueWriterConfig { } /// Configures how this value writer will encode its annotations (if any). - pub fn with_annotations_encoding(mut self, annotations_encoding: AnnotationsEncoding) -> Self { + pub const fn with_annotations_encoding( + mut self, + annotations_encoding: AnnotationsEncoding, + ) -> Self { self.annotations_encoding = annotations_encoding; self } /// If this value writer is used to write a struct, the struct be configured to encode its /// field names according to the specified t`field_name_encoding`. - pub fn with_field_name_encoding(mut self, field_name_encoding: FieldNameEncoding) -> Self { + pub const fn with_field_name_encoding( + mut self, + field_name_encoding: FieldNameEncoding, + ) -> Self { self.field_name_encoding = field_name_encoding; self } diff --git a/src/lazy/encoder/writer.rs b/src/lazy/encoder/writer.rs index d4068eb4..3a64ecb2 100644 --- a/src/lazy/encoder/writer.rs +++ b/src/lazy/encoder/writer.rs @@ -8,14 +8,14 @@ use crate::lazy::encoder::annotation_seq::{AnnotationSeq, AnnotationsVec}; use crate::lazy::encoder::binary::v1_1::value_writer::BinaryValueWriter_1_1; use crate::lazy::encoder::value_writer::internal::{FieldEncoder, MakeValueWriter}; use crate::lazy::encoder::value_writer::{ - AnnotatableWriter, EExpWriter, SequenceWriter, StructWriter, ValueWriter, + AnnotatableWriter, EExpWriter, FieldWriter, SequenceWriter, StructWriter, ValueWriter, }; use crate::lazy::encoder::value_writer_config::{ AnnotationsEncoding, ContainerEncoding, FieldNameEncoding, SymbolValueEncoding, ValueWriterConfig, }; use crate::lazy::encoder::write_as_ion::WriteAsIon; -use crate::lazy::encoder::{LazyRawWriter, SymbolCreationPolicy}; +use crate::lazy::encoder::LazyRawWriter; use crate::lazy::encoding::{ BinaryEncoding_1_0, BinaryEncoding_1_1, Encoding, TextEncoding_1_0, TextEncoding_1_1, }; @@ -24,38 +24,33 @@ use crate::raw_symbol_ref::AsRawSymbolRef; use crate::result::IonFailure; use crate::write_config::WriteConfig; use crate::{ - Decimal, Element, ElementWriter, Int, IonResult, IonType, RawSymbolRef, Symbol, SymbolTable, - Timestamp, UInt, Value, + Decimal, Element, ElementWriter, Int, IonResult, IonType, MacroTable, RawSymbolRef, Symbol, + SymbolTable, Timestamp, UInt, Value, }; -pub(crate) struct WriteContext { +pub(crate) struct WriterContext { symbol_table: SymbolTable, + macro_table: MacroTable, num_pending_symbols: usize, - symbol_creation_policy: SymbolCreationPolicy, - supports_text_tokens: bool, } -impl WriteContext { - pub fn new( - symbol_table: SymbolTable, - symbol_creation_policy: SymbolCreationPolicy, - supports_text_tokens: bool, - ) -> Self { +impl WriterContext { + pub fn new(symbol_table: SymbolTable, macro_table: MacroTable) -> Self { Self { symbol_table, + macro_table, num_pending_symbols: 0, - symbol_creation_policy, - supports_text_tokens, } } } /// An Ion writer that maintains a symbol table and creates new entries as needed. pub struct Writer { - write_context: WriteContext, + context: WriterContext, data_writer: E::Writer>, directive_writer: E::Writer>, output: Output, + value_writer_config: ValueWriterConfig, } pub type TextWriter_1_0 = Writer; @@ -74,16 +69,14 @@ impl Writer { // TODO: LazyEncoder should define a method to construct a new symtab and/or macro table let ion_version = E::ion_version(); let symbol_table = SymbolTable::new(ion_version); - let encoding_context = WriteContext::new( - symbol_table, - E::DEFAULT_SYMBOL_CREATION_POLICY, - E::SUPPORTS_TEXT_TOKENS, - ); + let macro_table = MacroTable::new(); + let context = WriterContext::new(symbol_table, macro_table); let mut writer = Writer { - write_context: encoding_context, + context, data_writer, directive_writer, output, + value_writer_config: E::default_value_writer_config(), }; writer.flush()?; Ok(writer) @@ -106,9 +99,9 @@ impl Writer { /// Writes bytes of previously encoded values to the output stream. pub fn flush(&mut self) -> IonResult<()> { - if self.write_context.num_pending_symbols > 0 { + if self.context.num_pending_symbols > 0 { self.write_lst_append()?; - self.write_context.num_pending_symbols = 0; + self.context.num_pending_symbols = 0; } self.directive_writer.flush()?; @@ -131,12 +124,11 @@ impl Writer { /// Helper method to encode an LST append containing pending symbols. fn write_lst_append(&mut self) -> IonResult<()> { let Self { - write_context: encoding_context, + context, directive_writer, .. } = self; - let num_pending_symbols = encoding_context.num_pending_symbols; let mut lst = directive_writer .value_writer() .with_annotations(system_symbol_ids::ION_SYMBOL_TABLE)? @@ -147,9 +139,9 @@ impl Writer { let mut new_symbol_list = lst.field_writer(system_symbol_ids::SYMBOLS).list_writer()?; - let pending_symbols = encoding_context + let pending_symbols = context .symbol_table - .symbols_tail(num_pending_symbols) + .symbols_tail(context.num_pending_symbols) .iter() .map(Symbol::text); @@ -170,7 +162,8 @@ impl MakeValueWriter for Writer { ApplicationValueWriter { raw_value_writer, - encoding: &mut self.write_context, + encoding: &mut self.context, + value_writer_config: self.value_writer_config, } } } @@ -185,14 +178,20 @@ impl SequenceWriter for Writer { } pub struct ApplicationValueWriter<'a, V: ValueWriter> { - encoding: &'a mut WriteContext, + encoding: &'a mut WriterContext, raw_value_writer: V, + value_writer_config: ValueWriterConfig, } impl<'a, V: ValueWriter> ApplicationValueWriter<'a, V> { - pub(crate) fn new(encoding_context: &'a mut WriteContext, raw_value_writer: V) -> Self { + pub(crate) fn new( + encoding_context: &'a mut WriterContext, + value_writer_config: ValueWriterConfig, + raw_value_writer: V, + ) -> Self { Self { encoding: encoding_context, + value_writer_config, raw_value_writer, } } @@ -203,20 +202,16 @@ impl<'a, V: ValueWriter> ApplicationValueWriter<'a, V> { } impl<'a, 'value, 'top> ApplicationValueWriter<'a, BinaryValueWriter_1_1<'value, 'top>> { - pub fn config(&self) -> ValueWriterConfig { - self.raw_value_writer.config() - } - pub fn with_container_encoding(mut self, container_encoding: ContainerEncoding) -> Self { - self.raw_value_writer = self - .raw_value_writer + self.value_writer_config = self + .value_writer_config .with_container_encoding(container_encoding); self } pub fn with_annotations_encoding(mut self, annotations_encoding: AnnotationsEncoding) -> Self { - self.raw_value_writer = self - .raw_value_writer + self.value_writer_config = self + .value_writer_config .with_annotations_encoding(annotations_encoding); self } @@ -225,18 +220,11 @@ impl<'a, 'value, 'top> ApplicationValueWriter<'a, BinaryValueWriter_1_1<'value, mut self, symbol_value_encoding: SymbolValueEncoding, ) -> Self { - self.raw_value_writer = self - .raw_value_writer + self.value_writer_config = self + .value_writer_config .with_symbol_value_encoding(symbol_value_encoding); self } - - pub fn with_field_name_encoding(mut self, field_name_encoding: FieldNameEncoding) -> Self { - self.raw_value_writer = self - .raw_value_writer - .with_field_name_encoding(field_name_encoding); - self - } } impl<'value, V: ValueWriter> AnnotatableWriter for ApplicationValueWriter<'value, V> { @@ -250,7 +238,7 @@ impl<'value, V: ValueWriter> AnnotatableWriter for ApplicationValueWriter<'value Self: 'a, { let mut annotations = annotations.into_annotations_vec(); - match self.config().annotations_encoding() { + match self.value_writer_config.annotations_encoding() { AnnotationsEncoding::WriteAsSymbolIds => { // Intern all text so everything we write is a symbol ID self.map_all_annotations_to_symbol_ids(&mut annotations)? @@ -268,6 +256,7 @@ impl<'value, V: ValueWriter> AnnotatableWriter for ApplicationValueWriter<'value Ok(ApplicationValueWriter { encoding: self.encoding, raw_value_writer: self.raw_value_writer.with_annotations(annotations)?, + value_writer_config: self.value_writer_config, }) } } @@ -390,7 +379,6 @@ impl<'value, V: ValueWriter> ValueWriter for ApplicationValueWriter<'value, V> { fn write_string(self, value: impl AsRef) -> IonResult<()>; fn write_clob(self, value: impl AsRef<[u8]>) -> IonResult<()>; fn write_blob(self, value: impl AsRef<[u8]>) -> IonResult<()>; - fn config(&self) -> ValueWriterConfig; } } @@ -398,10 +386,10 @@ impl<'value, V: ValueWriter> ValueWriter for ApplicationValueWriter<'value, V> { use RawSymbolRef::*; use SymbolValueEncoding::*; - let config = self.config(); let Self { encoding, raw_value_writer, + value_writer_config, } = self; // Depending on the symbol value encoding config option, map the provided symbol reference @@ -417,7 +405,7 @@ impl<'value, V: ValueWriter> ValueWriter for ApplicationValueWriter<'value, V> { SymbolId(symbol_id) } Text(text) => { - match config.symbol_value_encoding() { + match value_writer_config.symbol_value_encoding() { WriteAsSymbolIds => { // Map the text to a symbol ID. match encoding.symbol_table.sid_for(&text) { @@ -449,6 +437,7 @@ impl<'value, V: ValueWriter> ValueWriter for ApplicationValueWriter<'value, V> { fn list_writer(self) -> IonResult { Ok(ApplicationListWriter::new( self.encoding, + self.value_writer_config, self.raw_value_writer.list_writer()?, )) } @@ -456,13 +445,16 @@ impl<'value, V: ValueWriter> ValueWriter for ApplicationValueWriter<'value, V> { fn sexp_writer(self) -> IonResult { Ok(ApplicationSExpWriter::new( self.encoding, + self.value_writer_config, self.raw_value_writer.sexp_writer()?, )) } fn struct_writer(self) -> IonResult { + let config = self.value_writer_config; Ok(ApplicationStructWriter::new( self.encoding, + config, self.raw_value_writer.struct_writer()?, )) } @@ -470,26 +462,37 @@ impl<'value, V: ValueWriter> ValueWriter for ApplicationValueWriter<'value, V> { fn eexp_writer<'a>(self, macro_id: impl Into>) -> IonResult { Ok(ApplicationEExpWriter::new( self.encoding, + self.value_writer_config, self.raw_value_writer.eexp_writer(macro_id)?, )) } } pub struct ApplicationStructWriter<'value, V: ValueWriter> { - encoding: &'value mut WriteContext, + encoding: &'value mut WriterContext, raw_struct_writer: V::StructWriter, + value_writer_config: ValueWriterConfig, } impl<'value, V: ValueWriter> ApplicationStructWriter<'value, V> { pub(crate) fn new( - encoding_context: &'value mut WriteContext, + encoding_context: &'value mut WriterContext, + config: ValueWriterConfig, raw_struct_writer: V::StructWriter, ) -> Self { Self { encoding: encoding_context, raw_struct_writer, + value_writer_config: config, } } + + pub fn with_field_name_encoding(mut self, field_name_encoding: FieldNameEncoding) -> Self { + self.value_writer_config = self + .value_writer_config + .with_field_name_encoding(field_name_encoding); + self + } } impl<'value, V: ValueWriter> MakeValueWriter for ApplicationStructWriter<'value, V> { @@ -498,68 +501,89 @@ impl<'value, V: ValueWriter> MakeValueWriter for ApplicationStructWriter<'value, Self: 'a; fn make_value_writer(&mut self) -> Self::ValueWriter<'_> { - ApplicationValueWriter::new(self.encoding, self.raw_struct_writer.make_value_writer()) + ApplicationValueWriter::new( + self.encoding, + self.value_writer_config, + self.raw_struct_writer.make_value_writer(), + ) } } impl<'value, V: ValueWriter> FieldEncoder for ApplicationStructWriter<'value, V> { fn encode_field_name(&mut self, name: impl AsRawSymbolRef) -> IonResult<()> { - // If it's a symbol ID, do a bounds check and then write it. - // Otherwise, get its associated text. let text = match name.as_raw_symbol_token_ref() { + // If the user passes in a symbol ID, we range check it and write it as-is no matter what. + // In the unusual circumstance that the user has a SID and wants to write text, they can + // resolve the SID in the symbol table before calling this method. RawSymbolRef::SymbolId(symbol_id) => { if !self.encoding.symbol_table.sid_is_valid(symbol_id) { return cold_path!(IonResult::encoding_error(format!( "symbol ID ${symbol_id} is not in the symbol table" ))); } + // Otherwise, get its associated text. return self.raw_struct_writer.encode_field_name(symbol_id); } RawSymbolRef::Text(text) => text, }; - // If the writer can write it as inline text, do so. - if self.encoding.supports_text_tokens - && self.encoding.symbol_creation_policy == SymbolCreationPolicy::WriteProvidedToken - { + // From here on, we're dealing with text. + + // If the struct writer is configured to write field names as text, do that. + if self.value_writer_config.field_name_encoding() == FieldNameEncoding::WriteAsInlineText { return self.raw_struct_writer.encode_field_name(text); } // Otherwise, see if the symbol is already in the symbol table. - let symbol_id = match self.encoding.symbol_table.sid_for(&text) { + let token: RawSymbolRef = match self.encoding.symbol_table.sid_for(&text) { // If so, use the existing ID. - Some(sid) => sid, - // If not, add it to the symbol table and make a note to add it to the LST on the next - // call to `flush()`. Use the new ID. - None => { + Some(sid) => sid.into(), + // If it's not but the struct writer is configured to intern new text, add it to the + // symbol table. + None if self.value_writer_config.field_name_encoding() + == FieldNameEncoding::WriteAsSymbolIds => + { self.encoding.num_pending_symbols += 1; - self.encoding.symbol_table.add_symbol_for_text(text) + self.encoding.symbol_table.add_symbol_for_text(text).into() } + // Otherwise, we'll write the text as-is. + None => text.into(), }; - // Finally, write out the SID. - self.raw_struct_writer.encode_field_name(symbol_id) + // Finally, encode the field name using the selected token representation + self.raw_struct_writer.encode_field_name(token) } } impl<'value, V: ValueWriter> StructWriter for ApplicationStructWriter<'value, V> { + fn field_writer<'a>(&'a mut self, name: impl Into>) -> FieldWriter<'a, Self> { + FieldWriter::new(name.into(), self.value_writer_config, self) + } + fn close(self) -> IonResult<()> { self.raw_struct_writer.close() } + + fn config(&self) -> ValueWriterConfig { + self.value_writer_config + } } pub struct ApplicationListWriter<'value, V: ValueWriter> { - encoding: &'value mut WriteContext, + encoding: &'value mut WriterContext, raw_list_writer: V::ListWriter, + value_writer_config: ValueWriterConfig, } impl<'value, V: ValueWriter> ApplicationListWriter<'value, V> { pub(crate) fn new( - encoding_context: &'value mut WriteContext, + encoding_context: &'value mut WriterContext, + value_writer_config: ValueWriterConfig, raw_list_writer: V::ListWriter, ) -> Self { Self { encoding: encoding_context, + value_writer_config, raw_list_writer, } } @@ -571,7 +595,11 @@ impl<'value, V: ValueWriter> MakeValueWriter for ApplicationListWriter<'value, V Self: 'a; fn make_value_writer(&mut self) -> Self::ValueWriter<'_> { - ApplicationValueWriter::new(self.encoding, self.raw_list_writer.make_value_writer()) + ApplicationValueWriter::new( + self.encoding, + self.value_writer_config, + self.raw_list_writer.make_value_writer(), + ) } } @@ -584,14 +612,20 @@ impl<'value, V: ValueWriter> SequenceWriter for ApplicationListWriter<'value, V> } pub struct ApplicationSExpWriter<'value, V: ValueWriter> { - encoding: &'value mut WriteContext, + encoding: &'value mut WriterContext, raw_sexp_writer: V::SExpWriter, + value_writer_config: ValueWriterConfig, } impl<'value, V: ValueWriter> ApplicationSExpWriter<'value, V> { - pub(crate) fn new(encoding: &'value mut WriteContext, raw_sexp_writer: V::SExpWriter) -> Self { + pub(crate) fn new( + encoding: &'value mut WriterContext, + value_writer_config: ValueWriterConfig, + raw_sexp_writer: V::SExpWriter, + ) -> Self { Self { encoding, + value_writer_config, raw_sexp_writer, } } @@ -602,7 +636,11 @@ impl<'value, V: ValueWriter> MakeValueWriter for ApplicationSExpWriter<'value, V ApplicationValueWriter<'a, ::ValueWriter<'a>> where Self: 'a; fn make_value_writer(&mut self) -> Self::ValueWriter<'_> { - ApplicationValueWriter::new(self.encoding, self.raw_sexp_writer.make_value_writer()) + ApplicationValueWriter::new( + self.encoding, + self.value_writer_config, + self.raw_sexp_writer.make_value_writer(), + ) } } @@ -615,14 +653,20 @@ impl<'value, V: ValueWriter> SequenceWriter for ApplicationSExpWriter<'value, V> } pub struct ApplicationEExpWriter<'value, V: ValueWriter> { - encoding: &'value mut WriteContext, + encoding: &'value mut WriterContext, raw_eexp_writer: V::EExpWriter, + value_writer_config: ValueWriterConfig, } impl<'value, V: ValueWriter> ApplicationEExpWriter<'value, V> { - pub(crate) fn new(encoding: &'value mut WriteContext, raw_eexp_writer: V::EExpWriter) -> Self { + pub(crate) fn new( + encoding: &'value mut WriterContext, + value_writer_config: ValueWriterConfig, + raw_eexp_writer: V::EExpWriter, + ) -> Self { Self { encoding, + value_writer_config, raw_eexp_writer, } } @@ -640,7 +684,11 @@ impl<'value, V: ValueWriter> MakeValueWriter for ApplicationEExpWriter<'value, V type ValueWriter<'a> = ApplicationValueWriter<'a, <::EExpWriter as MakeValueWriter>::ValueWriter<'a>> where Self: 'a; fn make_value_writer(&mut self) -> Self::ValueWriter<'_> { - ApplicationValueWriter::new(self.encoding, self.raw_eexp_writer.make_value_writer()) + ApplicationValueWriter::new( + self.encoding, + self.value_writer_config, + self.raw_eexp_writer.make_value_writer(), + ) } } @@ -669,8 +717,8 @@ mod tests { use crate::lazy::encoder::value_writer_config::{AnnotationsEncoding, SymbolValueEncoding}; use crate::raw_symbol_ref::AsRawSymbolRef; use crate::{ - v1_1, HasSpan, IonResult, LazyRawValue, RawSymbolRef, SequenceWriter, SystemReader, - ValueWriter, Writer, + v1_1, FieldNameEncoding, HasSpan, IonResult, LazyRawValue, RawSymbolRef, SequenceWriter, + StructWriter, SystemReader, ValueWriter, Writer, }; fn symbol_value_encoding_test( @@ -692,7 +740,7 @@ mod tests { let actual_bytes = raw_value.span().bytes(); assert_eq!( actual_bytes, *expected_bytes, - "{:02X?} != {:02X?}", + "actual {:02X?} != expected {:02X?}", actual_bytes, expected_bytes ); println!( @@ -846,4 +894,86 @@ mod tests { ], ) } + + /// Writes a struct with all of the provided field names using the requested field name encoding. + /// For simplicity, the value for each field is the integer 0. + fn struct_field_encoding_test( + encoding: FieldNameEncoding, + field_names_and_encodings: &[(RawSymbolRef, &[u8])], + ) -> IonResult<()> { + // Configure a struct writer that uses the requested field name encoding + let mut writer = Writer::new(v1_1::Binary, Vec::new())?; + let mut struct_writer = writer + .value_writer() + .struct_writer()? + .with_field_name_encoding(encoding); + + for (name, _) in field_names_and_encodings { + struct_writer.write(name, /* same value for every field*/ 0)?; + } + struct_writer.close()?; + let bytes = writer.close()?; + + let mut reader = SystemReader::new(v1_1::Binary, bytes.as_slice()); + let struct_ = reader.expect_next_value()?.read()?.expect_struct()?; + for (field, (_name, expected_encoding)) in + struct_.iter().zip(field_names_and_encodings.iter()) + { + let raw_name = field?.get_raw_name().unwrap(); + let raw_name_encoding = raw_name.span().bytes(); + assert_eq!( + raw_name_encoding, *expected_encoding, + "actual {:02X?}\n!=\nexpected {:02X?}", + raw_name_encoding, *expected_encoding + ); + } + + Ok(()) + } + + #[test] + fn intern_all_field_names() -> IonResult<()> { + struct_field_encoding_test( + FieldNameEncoding::WriteAsSymbolIds, + &[ + // New symbols + (RawSymbolRef::Text("foo"), &[0x15]), // FlexUInt SID $10, + (RawSymbolRef::Text("bar"), &[0x17]), // FlexUInt SID $11, + (RawSymbolRef::Text("baz"), &[0x19]), // FlexUInt SID $12, + // Symbols that are already in the symbol table + (RawSymbolRef::Text("name"), &[0x09]), // FlexUInt SID $4, + (RawSymbolRef::Text("foo"), &[0x15]), // FlexUInt SID $10, + ], + ) + } + + #[test] + fn write_all_field_names_as_text() -> IonResult<()> { + struct_field_encoding_test( + FieldNameEncoding::WriteAsInlineText, + &[ + // New symbols + (RawSymbolRef::Text("foo"), &[0xFB, 0x66, 0x6F, 0x6F]), // FlexSym -3, "foo" + (RawSymbolRef::Text("bar"), &[0xFB, 0x62, 0x61, 0x72]), // FlexSym -3, "bar" + (RawSymbolRef::Text("baz"), &[0xFB, 0x62, 0x61, 0x7A]), // FlexSym -3, "baz" + // Symbols that are already in the symbol table are still written as text + (RawSymbolRef::Text("name"), &[0xF9, 0x6E, 0x61, 0x6D, 0x65]), // FlexSym -4, "name" + ], + ) + } + + #[test] + fn write_new_field_names_as_text() -> IonResult<()> { + struct_field_encoding_test( + FieldNameEncoding::WriteNewSymbolsAsInlineText, + &[ + // New symbols + (RawSymbolRef::Text("foo"), &[0xFB, 0x66, 0x6F, 0x6F]), // FlexSym -3, "foo" + (RawSymbolRef::Text("bar"), &[0xFB, 0x62, 0x61, 0x72]), // FlexSym -3, "bar" + (RawSymbolRef::Text("baz"), &[0xFB, 0x62, 0x61, 0x7A]), // FlexSym -3, "baz" + // Symbols that are already in the symbol table are written as SIDs + (RawSymbolRef::Text("name"), &[0x09]), // FlexSym 4, SID $4, + ], + ) + } } diff --git a/src/lazy/encoding.rs b/src/lazy/encoding.rs index 9744d62b..7411e20e 100644 --- a/src/lazy/encoding.rs +++ b/src/lazy/encoding.rs @@ -35,7 +35,10 @@ use crate::lazy::text::value::{ }; use crate::lazy::binary::raw::v1_1::e_expression::BinaryEExpression_1_1; -use crate::{IonResult, TextFormat, WriteConfig}; +use crate::{ + AnnotationsEncoding, ContainerEncoding, FieldNameEncoding, IonResult, SymbolValueEncoding, + TextFormat, ValueWriterConfig, WriteConfig, +}; /// Marker trait for types that represent an Ion encoding. pub trait Encoding: Encoder + Decoder { @@ -79,6 +82,7 @@ pub trait Encoding: Encoder + Decoder { } fn default_write_config() -> WriteConfig; + fn default_value_writer_config() -> ValueWriterConfig; } // Similar to a simple `From` implementation, but can be defined for both String and Vec because @@ -150,6 +154,14 @@ impl Encoding for BinaryEncoding_1_0 { fn default_write_config() -> WriteConfig { WriteConfig::::new() } + + fn default_value_writer_config() -> ValueWriterConfig { + ValueWriterConfig::binary() + .with_field_name_encoding(FieldNameEncoding::WriteAsSymbolIds) + .with_annotations_encoding(AnnotationsEncoding::WriteAsSymbolIds) + .with_container_encoding(ContainerEncoding::LengthPrefixed) + .with_symbol_value_encoding(SymbolValueEncoding::WriteAsSymbolIds) + } } impl Encoding for BinaryEncoding_1_1 { type Output = Vec; @@ -165,9 +177,15 @@ impl Encoding for BinaryEncoding_1_1 { fn name() -> &'static str { "binary Ion v1.1" } + fn default_write_config() -> WriteConfig { WriteConfig::::new() } + + fn default_value_writer_config() -> ValueWriterConfig { + // By default, use the same settings as binary 1.0 + BinaryEncoding_1_0::default_value_writer_config() + } } impl Encoding for TextEncoding_1_0 { type Output = String; @@ -186,6 +204,13 @@ impl Encoding for TextEncoding_1_0 { fn default_write_config() -> WriteConfig { WriteConfig::::new(::default()) } + fn default_value_writer_config() -> ValueWriterConfig { + ValueWriterConfig::text() + .with_field_name_encoding(FieldNameEncoding::WriteAsInlineText) + .with_annotations_encoding(AnnotationsEncoding::WriteAsInlineText) + .with_container_encoding(ContainerEncoding::Delimited) + .with_symbol_value_encoding(SymbolValueEncoding::WriteAsInlineText) + } } impl Encoding for TextEncoding_1_1 { type Output = String; @@ -204,6 +229,11 @@ impl Encoding for TextEncoding_1_1 { fn default_write_config() -> WriteConfig { WriteConfig::::new(::default()) } + + fn default_value_writer_config() -> ValueWriterConfig { + // By default, use the same settings as text 1.0 + TextEncoding_1_0::default_value_writer_config() + } } /// Marker trait for binary encodings of any version. diff --git a/src/lazy/expanded/mod.rs b/src/lazy/expanded/mod.rs index cbde1003..220cd0a0 100644 --- a/src/lazy/expanded/mod.rs +++ b/src/lazy/expanded/mod.rs @@ -33,7 +33,6 @@ //! that are ignored by the reader do not incur the cost of symbol table resolution. use std::cell::{Cell, UnsafeCell}; -use std::collections::HashMap; use std::fmt::{Debug, Formatter}; use std::ops::{Deref, Range}; @@ -48,7 +47,6 @@ use crate::lazy::decoder::{Decoder, LazyRawValue}; use crate::lazy::encoding::RawValueLiteral; use crate::lazy::expanded::compiler::TemplateCompiler; use crate::lazy::expanded::e_expression::EExpression; -use crate::lazy::expanded::encoding_module::EncodingModule; use crate::lazy::expanded::macro_evaluator::{ MacroEvaluator, MacroExpansion, MacroExpr, RawEExpression, }; @@ -94,7 +92,6 @@ pub mod template; // would need to be proved out first. #[derive(Debug)] pub struct EncodingContext { - pub(crate) modules: HashMap, pub(crate) macro_table: MacroTable, pub(crate) symbol_table: SymbolTable, pub(crate) allocator: BumpAllocator, @@ -107,7 +104,6 @@ impl EncodingContext { allocator: BumpAllocator, ) -> Self { Self { - modules: HashMap::new(), macro_table, symbol_table, allocator, diff --git a/src/lazy/never.rs b/src/lazy/never.rs index 68c86805..80aafe2b 100644 --- a/src/lazy/never.rs +++ b/src/lazy/never.rs @@ -19,7 +19,7 @@ use crate::lazy::span::Span; use crate::lazy::text::raw::v1_1::arg_group::EExpArg; use crate::lazy::text::raw::v1_1::reader::MacroIdRef; use crate::raw_symbol_ref::AsRawSymbolRef; -use crate::{Decimal, Int, IonResult, IonType, Timestamp}; +use crate::{Decimal, Int, IonResult, IonType, Timestamp, ValueWriterConfig}; /// An uninhabited type that signals to the compiler that related code paths are not reachable. #[derive(Debug, Copy, Clone)] @@ -57,6 +57,10 @@ impl StructWriter for Never { fn close(self) -> IonResult<()> { unreachable!("StructWriter::end in Never") } + + fn config(&self) -> ValueWriterConfig { + unreachable!("::config") + } } impl MakeValueWriter for Never { diff --git a/src/lazy/struct.rs b/src/lazy/struct.rs index d106e18f..77c33b6a 100644 --- a/src/lazy/struct.rs +++ b/src/lazy/struct.rs @@ -299,8 +299,15 @@ impl<'top, D: Decoder> LazyField<'top, D> { } } + // This is a `pub` version of `get_raw_name` that requires explicit opt-in. #[cfg(feature = "experimental-tooling-apis")] pub fn raw_name(&self) -> Option> { + self.get_raw_name() + } + + /// Like `raw_name`, but always accessible internally. + #[inline] + pub(crate) fn get_raw_name(&self) -> Option> { if let crate::LazyExpandedFieldName::RawName(_context, raw_name) = self.expanded_field.name() {