Skip to content

Commit

Permalink
Makes AnnotatableWriter and writer's symbol table experimentally pub (
Browse files Browse the repository at this point in the history
#889)

* Makes Writer symbol table experimentally pub
* Makes AnnotatableWriter experimentally pub
* Clippy suggestions RE: variant name prefixes
  • Loading branch information
zslayton authored Dec 28, 2024
1 parent 4e0d272 commit 75b0763
Show file tree
Hide file tree
Showing 11 changed files with 116 additions and 52 deletions.
8 changes: 7 additions & 1 deletion src/element/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ use std::cmp::Ordering;
use std::fmt::{Display, Formatter};
use std::io;

use crate::{ion_data, Decimal, Int, IonResult, IonType, Str, Symbol, Timestamp};
use crate::{ion_data, Decimal, Int, IonResult, IonType, Str, Symbol, SymbolRef, Timestamp};
use crate::{Blob, Bytes, Clob, List, SExp, Struct};
// Re-export the Value variant types and traits so they can be accessed directly from this module.
use crate::element::builders::{SequenceBuilder, StructBuilder};
Expand Down Expand Up @@ -251,6 +251,12 @@ impl From<Symbol> for Value {
}
}

impl From<SymbolRef<'_>> for Value {
fn from(sym_val: SymbolRef<'_>) -> Self {
Value::Symbol(sym_val.to_owned())
}
}

impl From<&[u8]> for Value {
fn from(value: &[u8]) -> Self {
Value::Blob(value.into())
Expand Down
1 change: 0 additions & 1 deletion src/lazy/any_encoding.rs
Original file line number Diff line number Diff line change
Expand Up @@ -726,7 +726,6 @@ pub struct LazyRawAnyValue<'top> {

impl<'top> LazyRawAnyValue<'top> {
/// Returns an enum indicating the encoding that backs this lazy value.
#[cfg(feature = "experimental-tooling-apis")]
pub fn kind(&self) -> LazyRawValueKind<'top> {
self.encoding
}
Expand Down
2 changes: 1 addition & 1 deletion src/lazy/encoder/binary/v1_1/value_writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ impl BinaryValueWriter_1_1<'_, '_> {
pub fn with_inline_symbol_text(mut self) -> Self {
self.value_writer_config = self
.value_writer_config
.with_symbol_value_encoding(SymbolValueEncoding::WriteAsInlineText);
.with_symbol_value_encoding(SymbolValueEncoding::InlineText);
self
}

Expand Down
34 changes: 34 additions & 0 deletions src/lazy/encoder/value_writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -481,3 +481,37 @@ pub trait SequenceWriter: MakeValueWriter {
Ok(self)
}
}

#[cfg(all(test, feature = "experimental-reader-writer"))]
mod tests {
use crate::symbol_ref::AsSymbolRef;
use crate::{ion_seq, v1_0, Element, IntoAnnotatedElement, SequenceWriter, Writer};
use crate::{AnnotatableWriter, IonResult, ValueWriter};
#[test]
fn save_and_reuse_symbol_id() -> IonResult<()> {
let mut writer = Writer::new(v1_0::Binary, vec![])?;
let name_symbol = writer
.value_writer()
.symbol_table()
.sid_for("name")
.unwrap();
writer
// Write the symbol twice using its ID
.write_symbol(name_symbol)?
.write_symbol(name_symbol)?
// Use the ID again as an annotation...
.value_writer()
.with_annotations(name_symbol)?
// ...when writing the symbol once more.
.write_symbol(name_symbol)?;
let bytes = writer.close()?;
let actual = Element::read_all(&bytes)?;
let expected = ion_seq!(
"name".as_symbol_ref()
"name".as_symbol_ref()
"name".as_symbol_ref().with_annotations(["name"])
);
assert_eq!(actual, expected);
Ok(())
}
}
31 changes: 16 additions & 15 deletions src/lazy/encoder/value_writer_config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,13 +38,13 @@ pub enum ContainerEncoding {
pub enum SymbolValueEncoding {
/// Add all symbol values to the symbol table and encode them as symbol IDs.
#[default]
WriteAsSymbolIds,
SymbolIds,
/// Do not add symbol values to the symbol table; write their text inline.
/// Symbol values specified as symbol IDs will not be mapped to text.
WriteAsInlineText,
InlineText,
/// If a symbol value is already in the symbol table, encode it as a symbol ID.
/// If it is not already in the symbol table, encode its text inline.
WriteNewSymbolsAsInlineText,
NewSymbolsAsInlineText,
}

/// Configuration options for encoding an annotations sequence.
Expand All @@ -53,48 +53,49 @@ pub enum SymbolValueEncoding {
pub enum AnnotationsEncoding {
/// Add all annotations to the symbol table and encode them as symbol IDs.
#[default]
WriteAsSymbolIds,
SymbolIds,
/// Do not add annotations to the symbol table; write their text inline.
/// Annotations specified as symbol IDs will not be mapped to text.
WriteAsInlineText,
InlineText,
/// If an annotation is already in the symbol table, encode it as a symbol ID.
/// If it is not already in the symbol table, encode its text inline.
WriteNewSymbolsAsInlineText,
NewSymbolsAsInlineText,
}

/// Configuration options for encoding a struct field name.
#[derive(Copy, Clone, PartialEq, Eq, Debug, Default)]
#[non_exhaustive]
#[allow(clippy::enum_variant_names)]
pub enum FieldNameEncoding {
/// Add all field names to the symbol table and encode them as symbol IDs.
#[default]
WriteAsSymbolIds,
SymbolIds,
/// Do not add field names to the symbol table; write their text inline.
/// Field names specified as symbol IDs will not be mapped to text.
WriteAsInlineText,
InlineText,
/// If a field name is already in the symbol table, encode it as a symbol ID.
/// If it is not already in the symbol table, encode its text inline.
WriteNewSymbolsAsInlineText,
NewSymbolsAsInlineText,
}

impl ValueWriterConfig {
/// Constructs a `ValueWriterConfig` that writes all symbol tokens as inline text.
pub const fn text() -> Self {
ValueWriterConfig {
container_encoding: ContainerEncoding::Delimited,
symbol_value_encoding: SymbolValueEncoding::WriteAsInlineText,
annotations_encoding: AnnotationsEncoding::WriteAsInlineText,
field_name_encoding: FieldNameEncoding::WriteAsInlineText,
symbol_value_encoding: SymbolValueEncoding::InlineText,
annotations_encoding: AnnotationsEncoding::InlineText,
field_name_encoding: FieldNameEncoding::InlineText,
}
}

/// Constructs a `ValueWriterConfig` that writes all symbol tokens as symbol IDs.
pub const fn binary() -> Self {
ValueWriterConfig {
container_encoding: ContainerEncoding::LengthPrefixed,
symbol_value_encoding: SymbolValueEncoding::WriteAsSymbolIds,
annotations_encoding: AnnotationsEncoding::WriteAsSymbolIds,
field_name_encoding: FieldNameEncoding::WriteAsSymbolIds,
symbol_value_encoding: SymbolValueEncoding::SymbolIds,
annotations_encoding: AnnotationsEncoding::SymbolIds,
field_name_encoding: FieldNameEncoding::SymbolIds,
}
}

Expand Down
72 changes: 48 additions & 24 deletions src/lazy/encoder/writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,18 @@ impl<E: Encoding, Output: Write> Writer<E, Output> {
Ok(self.output)
}

#[cfg(feature = "experimental-reader-writer")]
#[inline]
pub fn symbol_table(&self) -> &SymbolTable {
&self.context.symbol_table
}

#[cfg(not(feature = "experimental-reader-writer"))]
#[inline]
pub(crate) fn symbol_table(&self) -> &SymbolTable {
&self.context.symbol_table
}

/// Helper method to encode an LST append containing pending symbols.
fn write_lst_append(&mut self) -> IonResult<()> {
let Self {
Expand Down Expand Up @@ -235,9 +247,21 @@ impl<'a, V: ValueWriter> ApplicationValueWriter<'a, V> {
}
}

fn symbol_table(&mut self) -> &mut SymbolTable {
fn symbol_table_mut(&mut self) -> &mut SymbolTable {
&mut self.encoding.symbol_table
}

#[cfg(feature = "experimental-reader-writer")]
#[inline]
pub fn symbol_table(&self) -> &SymbolTable {
&self.encoding.symbol_table
}

#[cfg(not(feature = "experimental-reader-writer"))]
#[inline]
pub(crate) fn symbol_table(&self) -> &SymbolTable {
&self.encoding.symbol_table
}
}

impl ApplicationValueWriter<'_, BinaryValueWriter_1_1<'_, '_>> {
Expand Down Expand Up @@ -281,15 +305,15 @@ impl<V: ValueWriter> AnnotatableWriter for ApplicationValueWriter<'_, V> {
{
let mut annotations = annotations.into_annotations_vec();
match self.value_writer_config.annotations_encoding() {
AnnotationsEncoding::WriteAsSymbolIds => {
AnnotationsEncoding::SymbolIds => {
// Intern all text so everything we write is a symbol ID
self.intern_all_annotations(&mut annotations)?
}
AnnotationsEncoding::WriteAsInlineText => {
AnnotationsEncoding::InlineText => {
// Validate the symbol IDs, write the text as-is
self.validate_all_symbol_ids(&mut annotations)?
}
AnnotationsEncoding::WriteNewSymbolsAsInlineText => {
AnnotationsEncoding::NewSymbolsAsInlineText => {
// Map all known strings to symbol IDs, leave new text as is.
self.map_known_symbols_to_symbol_ids(&mut annotations)?
}
Expand Down Expand Up @@ -326,15 +350,15 @@ impl<V: ValueWriter> ApplicationValueWriter<'_, V> {
}
// The token is text...
RawSymbolRef::Text(text) => {
let sid = match self.symbol_table().sid_for(&text) {
let sid = match self.symbol_table().sid_for(text) {
Some(sid) => {
//...that was already in the symbol table.
sid
}
None => {
// ...that we need to add to the symbol table.
self.encoding.num_pending_symbols += 1;
self.symbol_table().add_symbol_for_text(text)
self.symbol_table_mut().add_symbol_for_text(text)
}
};
*annotation = RawSymbolRef::SymbolId(sid);
Expand Down Expand Up @@ -389,7 +413,7 @@ impl<V: ValueWriter> ApplicationValueWriter<'_, V> {
}
// The token is text...
RawSymbolRef::Text(text) => {
match self.symbol_table().sid_for(&text) {
match self.symbol_table_mut().sid_for(text) {
Some(sid) => {
//...that was already in the symbol table.
*annotation = RawSymbolRef::SymbolId(sid);
Expand Down Expand Up @@ -452,9 +476,9 @@ impl<'value, V: ValueWriter> ValueWriter for ApplicationValueWriter<'value, V> {
SystemSymbol_1_1(symbol) => SystemSymbol_1_1(symbol),
Text(text) => {
match value_writer_config.symbol_value_encoding() {
WriteAsSymbolIds => {
SymbolIds => {
// Map the text to a symbol ID.
match encoding.symbol_table.sid_for(&text) {
match encoding.symbol_table.sid_for(text) {
// If it's already in the symbol table, use that SID.
Some(symbol_id) => SymbolId(symbol_id),
// Otherwise, add it to the symbol table.
Expand All @@ -464,15 +488,15 @@ impl<'value, V: ValueWriter> ValueWriter for ApplicationValueWriter<'value, V> {
}
}
}
WriteNewSymbolsAsInlineText => {
NewSymbolsAsInlineText => {
// If the text is in the symbol table, use the symbol ID. Otherwise, use the text itself.
match encoding.symbol_table.sid_for(&text) {
match encoding.symbol_table.sid_for(text) {
Some(symbol_id) => SymbolId(symbol_id),
None => Text(text),
}
}
// We have text and we want to write text. Nothing to do.
WriteAsInlineText => Text(text),
InlineText => Text(text),
}
}
};
Expand Down Expand Up @@ -585,18 +609,18 @@ impl<V: ValueWriter> FieldEncoder for ApplicationStructWriter<'_, V> {
// From here on, we're dealing with text.

// If the struct writer is configured to write field names as text, do that.
if self.value_writer_config.field_name_encoding() == FieldNameEncoding::WriteAsInlineText {
if self.value_writer_config.field_name_encoding() == FieldNameEncoding::InlineText {
return self.raw_struct_writer.encode_field_name(text);
}

// Otherwise, see if the symbol is already in the symbol table.
let token: RawSymbolRef<'_> = match self.encoding.symbol_table.sid_for(&text) {
let token: RawSymbolRef<'_> = match self.encoding.symbol_table.sid_for(text) {
// If so, use the existing ID.
Some(sid) => sid.into(),
// If it's not but the struct writer is configured to intern new text, add it to the
// symbol table.
None if self.value_writer_config.field_name_encoding()
== FieldNameEncoding::WriteAsSymbolIds =>
== FieldNameEncoding::SymbolIds =>
{
self.encoding.num_pending_symbols += 1;
self.encoding.symbol_table.add_symbol_for_text(text).into()
Expand Down Expand Up @@ -828,7 +852,7 @@ mod tests {
fn intern_new_symbol_values() -> IonResult<()> {
use RawSymbolRef::*;
symbol_value_encoding_test(
SymbolValueEncoding::WriteAsSymbolIds,
SymbolValueEncoding::SymbolIds,
[
(Text("$ion_symbol_table"), &[0xE1, 0x03]),
(Text("name"), &[0xE1, 0x04]),
Expand All @@ -842,7 +866,7 @@ mod tests {
fn do_not_intern_new_symbol_values() -> IonResult<()> {
use RawSymbolRef::*;
symbol_value_encoding_test(
SymbolValueEncoding::WriteNewSymbolsAsInlineText,
SymbolValueEncoding::NewSymbolsAsInlineText,
[
// Known text symbols are written as SIDs
(Text("$ion_symbol_table"), &[0xE1, 0x03]),
Expand All @@ -860,7 +884,7 @@ mod tests {
fn encode_all_text_as_is() -> IonResult<()> {
use RawSymbolRef::*;
symbol_value_encoding_test(
SymbolValueEncoding::WriteAsInlineText,
SymbolValueEncoding::InlineText,
[
// Known text symbols are written as inline text
(Text("name"), &[0xA4, 0x6E, 0x61, 0x6D, 0x65]),
Expand Down Expand Up @@ -903,7 +927,7 @@ mod tests {
fn intern_new_annotations() -> IonResult<()> {
use RawSymbolRef::*;
annotations_sequence_encoding_test(
AnnotationsEncoding::WriteAsSymbolIds,
AnnotationsEncoding::SymbolIds,
&[
Text("$ion_symbol_table"),
Text("name"),
Expand All @@ -925,7 +949,7 @@ mod tests {
fn write_new_annotations_as_text() -> IonResult<()> {
use RawSymbolRef::*;
annotations_sequence_encoding_test(
AnnotationsEncoding::WriteNewSymbolsAsInlineText,
AnnotationsEncoding::NewSymbolsAsInlineText,
&[
Text("$ion_symbol_table"),
Text("name"),
Expand All @@ -950,7 +974,7 @@ mod tests {
fn write_text_annotations_as_is() -> IonResult<()> {
use RawSymbolRef::*;
annotations_sequence_encoding_test(
AnnotationsEncoding::WriteAsInlineText,
AnnotationsEncoding::InlineText,
&[Text("name"), SymbolId(6), Text("foo")],
&[
0xE9, // Opcode: FlexUInt follows with byte length of sequence
Expand Down Expand Up @@ -1007,7 +1031,7 @@ mod tests {
#[test]
fn intern_all_field_names() -> IonResult<()> {
struct_field_encoding_test(
FieldNameEncoding::WriteAsSymbolIds,
FieldNameEncoding::SymbolIds,
&[
// New symbols
(RawSymbolRef::Text("foo"), &[0x81]), // FlexUInt SID $64,
Expand All @@ -1023,7 +1047,7 @@ mod tests {
#[test]
fn write_all_field_names_as_text() -> IonResult<()> {
struct_field_encoding_test(
FieldNameEncoding::WriteAsInlineText,
FieldNameEncoding::InlineText,
&[
// New symbols
(RawSymbolRef::Text("foo"), &[0xFB, 0x66, 0x6F, 0x6F]), // FlexSym -3, "foo"
Expand All @@ -1038,7 +1062,7 @@ mod tests {
#[test]
fn write_new_field_names_as_text() -> IonResult<()> {
struct_field_encoding_test(
FieldNameEncoding::WriteNewSymbolsAsInlineText,
FieldNameEncoding::NewSymbolsAsInlineText,
&[
// New symbols
(RawSymbolRef::Text("foo"), &[0xFB, 0x66, 0x6F, 0x6F]), // FlexSym -3, "foo"
Expand Down
Loading

0 comments on commit 75b0763

Please sign in to comment.