diff --git a/src/element/mod.rs b/src/element/mod.rs index dbc34f3a..4ecb6602 100644 --- a/src/element/mod.rs +++ b/src/element/mod.rs @@ -26,7 +26,7 @@ use std::cmp::Ordering; use std::fmt::{Display, Formatter}; use std::io; -use crate::{ion_data, Decimal, Int, IonResult, IonType, Str, Symbol, Timestamp}; +use crate::{ion_data, Decimal, Int, IonResult, IonType, Str, Symbol, SymbolRef, Timestamp}; use crate::{Blob, Bytes, Clob, List, SExp, Struct}; // Re-export the Value variant types and traits so they can be accessed directly from this module. use crate::element::builders::{SequenceBuilder, StructBuilder}; @@ -251,6 +251,12 @@ impl From for Value { } } +impl From> for Value { + fn from(sym_val: SymbolRef<'_>) -> Self { + Value::Symbol(sym_val.to_owned()) + } +} + impl From<&[u8]> for Value { fn from(value: &[u8]) -> Self { Value::Blob(value.into()) diff --git a/src/lazy/any_encoding.rs b/src/lazy/any_encoding.rs index 8274373f..24d5489c 100644 --- a/src/lazy/any_encoding.rs +++ b/src/lazy/any_encoding.rs @@ -726,7 +726,6 @@ pub struct LazyRawAnyValue<'top> { impl<'top> LazyRawAnyValue<'top> { /// Returns an enum indicating the encoding that backs this lazy value. - #[cfg(feature = "experimental-tooling-apis")] pub fn kind(&self) -> LazyRawValueKind<'top> { self.encoding } diff --git a/src/lazy/encoder/binary/v1_1/value_writer.rs b/src/lazy/encoder/binary/v1_1/value_writer.rs index 12149a66..bf4ce14c 100644 --- a/src/lazy/encoder/binary/v1_1/value_writer.rs +++ b/src/lazy/encoder/binary/v1_1/value_writer.rs @@ -61,7 +61,7 @@ impl BinaryValueWriter_1_1<'_, '_> { pub fn with_inline_symbol_text(mut self) -> Self { self.value_writer_config = self .value_writer_config - .with_symbol_value_encoding(SymbolValueEncoding::WriteAsInlineText); + .with_symbol_value_encoding(SymbolValueEncoding::InlineText); self } diff --git a/src/lazy/encoder/value_writer.rs b/src/lazy/encoder/value_writer.rs index f27aae65..bd6713c1 100644 --- a/src/lazy/encoder/value_writer.rs +++ b/src/lazy/encoder/value_writer.rs @@ -481,3 +481,37 @@ pub trait SequenceWriter: MakeValueWriter { Ok(self) } } + +#[cfg(all(test, feature = "experimental-reader-writer"))] +mod tests { + use crate::symbol_ref::AsSymbolRef; + use crate::{ion_seq, v1_0, Element, IntoAnnotatedElement, SequenceWriter, Writer}; + use crate::{AnnotatableWriter, IonResult, ValueWriter}; + #[test] + fn save_and_reuse_symbol_id() -> IonResult<()> { + let mut writer = Writer::new(v1_0::Binary, vec![])?; + let name_symbol = writer + .value_writer() + .symbol_table() + .sid_for("name") + .unwrap(); + writer + // Write the symbol twice using its ID + .write_symbol(name_symbol)? + .write_symbol(name_symbol)? + // Use the ID again as an annotation... + .value_writer() + .with_annotations(name_symbol)? + // ...when writing the symbol once more. + .write_symbol(name_symbol)?; + let bytes = writer.close()?; + let actual = Element::read_all(&bytes)?; + let expected = ion_seq!( + "name".as_symbol_ref() + "name".as_symbol_ref() + "name".as_symbol_ref().with_annotations(["name"]) + ); + assert_eq!(actual, expected); + Ok(()) + } +} diff --git a/src/lazy/encoder/value_writer_config.rs b/src/lazy/encoder/value_writer_config.rs index f5d30662..fce9223c 100644 --- a/src/lazy/encoder/value_writer_config.rs +++ b/src/lazy/encoder/value_writer_config.rs @@ -38,13 +38,13 @@ pub enum ContainerEncoding { pub enum SymbolValueEncoding { /// Add all symbol values to the symbol table and encode them as symbol IDs. #[default] - WriteAsSymbolIds, + SymbolIds, /// Do not add symbol values to the symbol table; write their text inline. /// Symbol values specified as symbol IDs will not be mapped to text. - WriteAsInlineText, + InlineText, /// If a symbol value is already in the symbol table, encode it as a symbol ID. /// If it is not already in the symbol table, encode its text inline. - WriteNewSymbolsAsInlineText, + NewSymbolsAsInlineText, } /// Configuration options for encoding an annotations sequence. @@ -53,28 +53,29 @@ pub enum SymbolValueEncoding { pub enum AnnotationsEncoding { /// Add all annotations to the symbol table and encode them as symbol IDs. #[default] - WriteAsSymbolIds, + SymbolIds, /// Do not add annotations to the symbol table; write their text inline. /// Annotations specified as symbol IDs will not be mapped to text. - WriteAsInlineText, + InlineText, /// If an annotation is already in the symbol table, encode it as a symbol ID. /// If it is not already in the symbol table, encode its text inline. - WriteNewSymbolsAsInlineText, + NewSymbolsAsInlineText, } /// Configuration options for encoding a struct field name. #[derive(Copy, Clone, PartialEq, Eq, Debug, Default)] #[non_exhaustive] +#[allow(clippy::enum_variant_names)] pub enum FieldNameEncoding { /// Add all field names to the symbol table and encode them as symbol IDs. #[default] - WriteAsSymbolIds, + SymbolIds, /// Do not add field names to the symbol table; write their text inline. /// Field names specified as symbol IDs will not be mapped to text. - WriteAsInlineText, + InlineText, /// If a field name is already in the symbol table, encode it as a symbol ID. /// If it is not already in the symbol table, encode its text inline. - WriteNewSymbolsAsInlineText, + NewSymbolsAsInlineText, } impl ValueWriterConfig { @@ -82,9 +83,9 @@ impl ValueWriterConfig { pub const fn text() -> Self { ValueWriterConfig { container_encoding: ContainerEncoding::Delimited, - symbol_value_encoding: SymbolValueEncoding::WriteAsInlineText, - annotations_encoding: AnnotationsEncoding::WriteAsInlineText, - field_name_encoding: FieldNameEncoding::WriteAsInlineText, + symbol_value_encoding: SymbolValueEncoding::InlineText, + annotations_encoding: AnnotationsEncoding::InlineText, + field_name_encoding: FieldNameEncoding::InlineText, } } @@ -92,9 +93,9 @@ impl ValueWriterConfig { pub const fn binary() -> Self { ValueWriterConfig { container_encoding: ContainerEncoding::LengthPrefixed, - symbol_value_encoding: SymbolValueEncoding::WriteAsSymbolIds, - annotations_encoding: AnnotationsEncoding::WriteAsSymbolIds, - field_name_encoding: FieldNameEncoding::WriteAsSymbolIds, + symbol_value_encoding: SymbolValueEncoding::SymbolIds, + annotations_encoding: AnnotationsEncoding::SymbolIds, + field_name_encoding: FieldNameEncoding::SymbolIds, } } diff --git a/src/lazy/encoder/writer.rs b/src/lazy/encoder/writer.rs index 6f945135..f74ceb6c 100644 --- a/src/lazy/encoder/writer.rs +++ b/src/lazy/encoder/writer.rs @@ -125,6 +125,18 @@ impl Writer { Ok(self.output) } + #[cfg(feature = "experimental-reader-writer")] + #[inline] + pub fn symbol_table(&self) -> &SymbolTable { + &self.context.symbol_table + } + + #[cfg(not(feature = "experimental-reader-writer"))] + #[inline] + pub(crate) fn symbol_table(&self) -> &SymbolTable { + &self.context.symbol_table + } + /// Helper method to encode an LST append containing pending symbols. fn write_lst_append(&mut self) -> IonResult<()> { let Self { @@ -235,9 +247,21 @@ impl<'a, V: ValueWriter> ApplicationValueWriter<'a, V> { } } - fn symbol_table(&mut self) -> &mut SymbolTable { + fn symbol_table_mut(&mut self) -> &mut SymbolTable { &mut self.encoding.symbol_table } + + #[cfg(feature = "experimental-reader-writer")] + #[inline] + pub fn symbol_table(&self) -> &SymbolTable { + &self.encoding.symbol_table + } + + #[cfg(not(feature = "experimental-reader-writer"))] + #[inline] + pub(crate) fn symbol_table(&self) -> &SymbolTable { + &self.encoding.symbol_table + } } impl ApplicationValueWriter<'_, BinaryValueWriter_1_1<'_, '_>> { @@ -281,15 +305,15 @@ impl AnnotatableWriter for ApplicationValueWriter<'_, V> { { let mut annotations = annotations.into_annotations_vec(); match self.value_writer_config.annotations_encoding() { - AnnotationsEncoding::WriteAsSymbolIds => { + AnnotationsEncoding::SymbolIds => { // Intern all text so everything we write is a symbol ID self.intern_all_annotations(&mut annotations)? } - AnnotationsEncoding::WriteAsInlineText => { + AnnotationsEncoding::InlineText => { // Validate the symbol IDs, write the text as-is self.validate_all_symbol_ids(&mut annotations)? } - AnnotationsEncoding::WriteNewSymbolsAsInlineText => { + AnnotationsEncoding::NewSymbolsAsInlineText => { // Map all known strings to symbol IDs, leave new text as is. self.map_known_symbols_to_symbol_ids(&mut annotations)? } @@ -326,7 +350,7 @@ impl ApplicationValueWriter<'_, V> { } // The token is text... RawSymbolRef::Text(text) => { - let sid = match self.symbol_table().sid_for(&text) { + let sid = match self.symbol_table().sid_for(text) { Some(sid) => { //...that was already in the symbol table. sid @@ -334,7 +358,7 @@ impl ApplicationValueWriter<'_, V> { None => { // ...that we need to add to the symbol table. self.encoding.num_pending_symbols += 1; - self.symbol_table().add_symbol_for_text(text) + self.symbol_table_mut().add_symbol_for_text(text) } }; *annotation = RawSymbolRef::SymbolId(sid); @@ -389,7 +413,7 @@ impl ApplicationValueWriter<'_, V> { } // The token is text... RawSymbolRef::Text(text) => { - match self.symbol_table().sid_for(&text) { + match self.symbol_table_mut().sid_for(text) { Some(sid) => { //...that was already in the symbol table. *annotation = RawSymbolRef::SymbolId(sid); @@ -452,9 +476,9 @@ impl<'value, V: ValueWriter> ValueWriter for ApplicationValueWriter<'value, V> { SystemSymbol_1_1(symbol) => SystemSymbol_1_1(symbol), Text(text) => { match value_writer_config.symbol_value_encoding() { - WriteAsSymbolIds => { + SymbolIds => { // Map the text to a symbol ID. - match encoding.symbol_table.sid_for(&text) { + match encoding.symbol_table.sid_for(text) { // If it's already in the symbol table, use that SID. Some(symbol_id) => SymbolId(symbol_id), // Otherwise, add it to the symbol table. @@ -464,15 +488,15 @@ impl<'value, V: ValueWriter> ValueWriter for ApplicationValueWriter<'value, V> { } } } - WriteNewSymbolsAsInlineText => { + NewSymbolsAsInlineText => { // If the text is in the symbol table, use the symbol ID. Otherwise, use the text itself. - match encoding.symbol_table.sid_for(&text) { + match encoding.symbol_table.sid_for(text) { Some(symbol_id) => SymbolId(symbol_id), None => Text(text), } } // We have text and we want to write text. Nothing to do. - WriteAsInlineText => Text(text), + InlineText => Text(text), } } }; @@ -585,18 +609,18 @@ impl FieldEncoder for ApplicationStructWriter<'_, V> { // From here on, we're dealing with text. // If the struct writer is configured to write field names as text, do that. - if self.value_writer_config.field_name_encoding() == FieldNameEncoding::WriteAsInlineText { + if self.value_writer_config.field_name_encoding() == FieldNameEncoding::InlineText { return self.raw_struct_writer.encode_field_name(text); } // Otherwise, see if the symbol is already in the symbol table. - let token: RawSymbolRef<'_> = match self.encoding.symbol_table.sid_for(&text) { + let token: RawSymbolRef<'_> = match self.encoding.symbol_table.sid_for(text) { // If so, use the existing ID. Some(sid) => sid.into(), // If it's not but the struct writer is configured to intern new text, add it to the // symbol table. None if self.value_writer_config.field_name_encoding() - == FieldNameEncoding::WriteAsSymbolIds => + == FieldNameEncoding::SymbolIds => { self.encoding.num_pending_symbols += 1; self.encoding.symbol_table.add_symbol_for_text(text).into() @@ -828,7 +852,7 @@ mod tests { fn intern_new_symbol_values() -> IonResult<()> { use RawSymbolRef::*; symbol_value_encoding_test( - SymbolValueEncoding::WriteAsSymbolIds, + SymbolValueEncoding::SymbolIds, [ (Text("$ion_symbol_table"), &[0xE1, 0x03]), (Text("name"), &[0xE1, 0x04]), @@ -842,7 +866,7 @@ mod tests { fn do_not_intern_new_symbol_values() -> IonResult<()> { use RawSymbolRef::*; symbol_value_encoding_test( - SymbolValueEncoding::WriteNewSymbolsAsInlineText, + SymbolValueEncoding::NewSymbolsAsInlineText, [ // Known text symbols are written as SIDs (Text("$ion_symbol_table"), &[0xE1, 0x03]), @@ -860,7 +884,7 @@ mod tests { fn encode_all_text_as_is() -> IonResult<()> { use RawSymbolRef::*; symbol_value_encoding_test( - SymbolValueEncoding::WriteAsInlineText, + SymbolValueEncoding::InlineText, [ // Known text symbols are written as inline text (Text("name"), &[0xA4, 0x6E, 0x61, 0x6D, 0x65]), @@ -903,7 +927,7 @@ mod tests { fn intern_new_annotations() -> IonResult<()> { use RawSymbolRef::*; annotations_sequence_encoding_test( - AnnotationsEncoding::WriteAsSymbolIds, + AnnotationsEncoding::SymbolIds, &[ Text("$ion_symbol_table"), Text("name"), @@ -925,7 +949,7 @@ mod tests { fn write_new_annotations_as_text() -> IonResult<()> { use RawSymbolRef::*; annotations_sequence_encoding_test( - AnnotationsEncoding::WriteNewSymbolsAsInlineText, + AnnotationsEncoding::NewSymbolsAsInlineText, &[ Text("$ion_symbol_table"), Text("name"), @@ -950,7 +974,7 @@ mod tests { fn write_text_annotations_as_is() -> IonResult<()> { use RawSymbolRef::*; annotations_sequence_encoding_test( - AnnotationsEncoding::WriteAsInlineText, + AnnotationsEncoding::InlineText, &[Text("name"), SymbolId(6), Text("foo")], &[ 0xE9, // Opcode: FlexUInt follows with byte length of sequence @@ -1007,7 +1031,7 @@ mod tests { #[test] fn intern_all_field_names() -> IonResult<()> { struct_field_encoding_test( - FieldNameEncoding::WriteAsSymbolIds, + FieldNameEncoding::SymbolIds, &[ // New symbols (RawSymbolRef::Text("foo"), &[0x81]), // FlexUInt SID $64, @@ -1023,7 +1047,7 @@ mod tests { #[test] fn write_all_field_names_as_text() -> IonResult<()> { struct_field_encoding_test( - FieldNameEncoding::WriteAsInlineText, + FieldNameEncoding::InlineText, &[ // New symbols (RawSymbolRef::Text("foo"), &[0xFB, 0x66, 0x6F, 0x6F]), // FlexSym -3, "foo" @@ -1038,7 +1062,7 @@ mod tests { #[test] fn write_new_field_names_as_text() -> IonResult<()> { struct_field_encoding_test( - FieldNameEncoding::WriteNewSymbolsAsInlineText, + FieldNameEncoding::NewSymbolsAsInlineText, &[ // New symbols (RawSymbolRef::Text("foo"), &[0xFB, 0x66, 0x6F, 0x6F]), // FlexSym -3, "foo" diff --git a/src/lazy/encoding.rs b/src/lazy/encoding.rs index de2a8e41..e61a5160 100644 --- a/src/lazy/encoding.rs +++ b/src/lazy/encoding.rs @@ -157,10 +157,10 @@ impl Encoding for BinaryEncoding_1_0 { fn default_value_writer_config() -> ValueWriterConfig { ValueWriterConfig::binary() - .with_field_name_encoding(FieldNameEncoding::WriteAsSymbolIds) - .with_annotations_encoding(AnnotationsEncoding::WriteAsSymbolIds) + .with_field_name_encoding(FieldNameEncoding::SymbolIds) + .with_annotations_encoding(AnnotationsEncoding::SymbolIds) .with_container_encoding(ContainerEncoding::LengthPrefixed) - .with_symbol_value_encoding(SymbolValueEncoding::WriteAsSymbolIds) + .with_symbol_value_encoding(SymbolValueEncoding::SymbolIds) } } impl Encoding for BinaryEncoding_1_1 { @@ -206,10 +206,10 @@ impl Encoding for TextEncoding_1_0 { } fn default_value_writer_config() -> ValueWriterConfig { ValueWriterConfig::text() - .with_field_name_encoding(FieldNameEncoding::WriteAsInlineText) - .with_annotations_encoding(AnnotationsEncoding::WriteAsInlineText) + .with_field_name_encoding(FieldNameEncoding::InlineText) + .with_annotations_encoding(AnnotationsEncoding::InlineText) .with_container_encoding(ContainerEncoding::Delimited) - .with_symbol_value_encoding(SymbolValueEncoding::WriteAsInlineText) + .with_symbol_value_encoding(SymbolValueEncoding::InlineText) } } impl Encoding for TextEncoding_1_1 { diff --git a/src/lib.rs b/src/lib.rs index 3ebdcf9b..e5e957fc 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -211,7 +211,7 @@ macro_rules! v1_x_reader_writer { lazy::value_ref::ValueRef, lazy::r#struct::{LazyStruct, LazyField}, lazy::sequence::{LazyList, LazySExp}, - lazy::encoder::value_writer::{ValueWriter, ContextWriter, StructWriter, SequenceWriter, EExpWriter}, + lazy::encoder::value_writer::{AnnotatableWriter, ValueWriter, ContextWriter, StructWriter, SequenceWriter, EExpWriter}, lazy::any_encoding::IonEncoding, lazy::expanded::compiler::TemplateCompiler, lazy::expanded::template::TemplateMacro, diff --git a/src/symbol_table.rs b/src/symbol_table.rs index 05a805d0..8f256e99 100644 --- a/src/symbol_table.rs +++ b/src/symbol_table.rs @@ -204,7 +204,7 @@ impl SymbolTable { } /// If defined, returns the Symbol ID associated with the provided text. - pub fn sid_for>(&self, text: &A) -> Option { + pub fn sid_for>(&self, text: A) -> Option { self.ids_by_text.get(text.as_ref()).copied() } diff --git a/tests/conformance_dsl/fragment.rs b/tests/conformance_dsl/fragment.rs index 50248ae6..a8f5165e 100644 --- a/tests/conformance_dsl/fragment.rs +++ b/tests/conformance_dsl/fragment.rs @@ -283,7 +283,7 @@ impl PartialEq> for ProxyElement<'_ let shared_symbol_txt = shared_symbol.text().unwrap_or(""); let shared_id = symbol_table - .sid_for(&shared_symbol_txt) + .sid_for(shared_symbol_txt) .unwrap_or(0); actual_field.matches_sid_or_text( shared_id, diff --git a/tests/conformance_dsl/model.rs b/tests/conformance_dsl/model.rs index 74fd48ae..4d49de41 100644 --- a/tests/conformance_dsl/model.rs +++ b/tests/conformance_dsl/model.rs @@ -365,7 +365,7 @@ pub(crate) fn compare_values( let shared_text = shared_symbol.text().unwrap_or(""); ( shared_text.to_string(), - other.symbol_table().sid_for(&shared_text).unwrap_or(0), + other.symbol_table().sid_for(shared_text).unwrap_or(0), ) } }