diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 15fd29c..3e5f48a 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -22,6 +22,7 @@ jobs: - {name: ciborium-ll} - {name: ciborium} - {name: ciborium, feat: std} + - {name: ciborium, feat: canonical} - {name: ciborium-io} - {name: ciborium-io, feat: alloc} - {name: ciborium-io, feat: std} diff --git a/ciborium/Cargo.toml b/ciborium/Cargo.toml index e49a7dc..acb54d4 100644 --- a/ciborium/Cargo.toml +++ b/ciborium/Cargo.toml @@ -34,6 +34,7 @@ hex = "0.4" [features] default = ["std"] std = ["ciborium-io/std", "serde/std"] +canonical = ["std"] [package.metadata.docs.rs] all-features = true diff --git a/ciborium/README.md b/ciborium/README.md index b60c35e..1549c28 100644 --- a/ciborium/README.md +++ b/ciborium/README.md @@ -18,6 +18,13 @@ passed to these functions just as streams can. For dynamic CBOR value creation/inspection, see [`Value`](crate::value::Value). +## Features +- `std`: enabled by default. +- `canonical`: allows serializing with a `CanonicalizationScheme` for deterministic + outputs. Incurs a small performance penalty (~20% slower) when serializing + without a canonicalization scheme, and a large penalty (~100% slower) when + serializing with a canonicalization scheme. + ## Design Decisions ### Always Serialize Numeric Values to the Smallest Size diff --git a/ciborium/src/lib.rs b/ciborium/src/lib.rs index f0edf0c..3852212 100644 --- a/ciborium/src/lib.rs +++ b/ciborium/src/lib.rs @@ -13,6 +13,13 @@ //! //! For dynamic CBOR value creation/inspection, see [`Value`](crate::value::Value). //! +//! # Features +//! - `std`: enabled by default. +//! - `canonical`: allows serializing with a `CanonicalizationScheme` for deterministic +//! outputs. Incurs a small performance penalty (~20% slower) when serializing +//! without a canonicalization scheme, and a large penalty (~100% slower) when +//! serializing with a canonicalization scheme. +//! //! # Design Decisions //! //! ## Always Serialize Numeric Values to the Smallest Size @@ -106,7 +113,16 @@ pub use crate::ser::{into_writer, Serializer}; #[doc(inline)] #[cfg(feature = "std")] -pub use crate::ser::{into_writer_canonical, to_vec, to_vec_canonical}; +pub use crate::ser::to_vec; + +#[doc(inline)] +#[cfg(feature = "canonical")] +pub use crate::ser::{into_writer_canonical, to_vec_canonical}; + +#[cfg(feature = "std")] +#[doc(inline)] +#[deprecated(since = "0.3.0", note = "Please use `to_vec` instead")] +pub use crate::ser::to_vec as into_vec; #[doc(inline)] pub use crate::value::Value; diff --git a/ciborium/src/ser/mod.rs b/ciborium/src/ser/mod.rs index 9faf7f7..ff6705a 100644 --- a/ciborium/src/ser/mod.rs +++ b/ciborium/src/ser/mod.rs @@ -9,42 +9,28 @@ pub use error::Error; use alloc::string::ToString; use ciborium_io::Write; use ciborium_ll::*; -use serde::{ser, Serialize as _}; +use serde::ser; /// Which canonicalization scheme to use for CBOR serialization. +/// +/// Can only be initialized with the `std` feature enabled. #[derive(Debug, Copy, Clone, PartialEq, Eq)] pub enum CanonicalizationScheme { - /// No canonicalization, don't sort map keys. Faster and reduces allocations. - None, - /// Sort map keys in output according to [RFC 7049]'s deterministic encoding spec. /// /// Also aligns with [RFC 8949 4.2.3]'s backwards compatibility sort order. /// /// Uses length-first map key ordering. Eg. `["a", "b", "aa"]`. - #[cfg(feature = "std")] + #[cfg(feature = "canonical")] Rfc7049, /// Sort map keys in output according to [RFC 8949]'s deterministic encoding spec. /// /// Uses bytewise lexicographic map key ordering. Eg. `["a", "aa", "b"]`. - #[cfg(feature = "std")] + #[cfg(feature = "canonical")] Rfc8049, } -impl CanonicalizationScheme { - /// Does this canonicalization scheme require sorting of keys. - pub fn is_sorting(&self) -> bool { - #[cfg(feature = "std")] { - matches!(self, Self::Rfc7049 | Self::Rfc8049) - } - - #[cfg(not(feature = "std"))] { - false - } - } -} - /// A serializer for CBOR. /// /// # Example @@ -63,14 +49,14 @@ impl CanonicalizationScheme { /// /// let mut buffer = Vec::with_capacity(1024); /// -/// #[cfg(feature = "std")] { -/// let mut serializer = Serializer::new(&mut buffer, ciborium::ser::CanonicalizationScheme::Rfc8049); +/// #[cfg(feature = "canonical")] { +/// let mut serializer = Serializer::new(&mut buffer, Some(ciborium::ser::CanonicalizationScheme::Rfc8049)); /// example.serialize(&mut serializer).unwrap(); /// assert_eq!(hex::encode(&buffer), "a36161182a61621910686261611901a4"); /// } /// -/// #[cfg(not(feature = "std"))] { -/// let mut serializer = Serializer::from(&mut buffer); // uses no canonicalization +/// #[cfg(not(feature = "canonical"))] { +/// let mut serializer = Serializer::new(&mut buffer, None); // uses no canonicalization /// example.serialize(&mut serializer).unwrap(); /// assert_eq!(hex::encode(&buffer), "a36161182a6261611901a46162191068"); /// } @@ -80,7 +66,7 @@ pub struct Serializer { /// Whether to canonically sort map keys in output according a particular /// [CanonicalizationScheme] map key sort ordering. - canonicalization: CanonicalizationScheme, + canonicalization: Option, } impl Serializer { @@ -88,7 +74,11 @@ impl Serializer { /// /// `canonicalization` can be used to change the [CanonicalizationScheme] used for sorting /// output map and struct keys to ensure deterministic outputs. - pub fn new(encoder: impl Into>, canonicalization: CanonicalizationScheme) -> Self { + #[inline] + pub fn new( + encoder: impl Into>, + canonicalization: Option, + ) -> Self { Self { encoder: encoder.into(), canonicalization, @@ -101,8 +91,7 @@ impl From for Serializer { fn from(writer: W) -> Self { Self { encoder: writer.into(), - #[cfg(feature = "std")] - canonicalization: CanonicalizationScheme::None, + canonicalization: None, } } } @@ -112,8 +101,7 @@ impl From> for Serializer { fn from(writer: Encoder) -> Self { Self { encoder: writer, - #[cfg(feature = "std")] - canonicalization: CanonicalizationScheme::None, + canonicalization: None, } } } @@ -340,7 +328,9 @@ where length: usize, ) -> Result { match (name, variant) { - ("@@TAG@@", "@@TAGGED@@") => CollectionSerializer::new(self, CollectionType::Tag, Some(length)), + ("@@TAG@@", "@@TAGGED@@") => { + CollectionSerializer::new(self, CollectionType::Tag, Some(length)) + } _ => { self.encoder.push(Header::Map(Some(1)))?; @@ -385,19 +375,30 @@ where macro_rules! end { () => { + #[allow(unused_mut)] #[inline] fn end(mut self) -> Result<(), Self::Error> { - if self.serializer.canonicalization.is_sorting() { - // Canonical serialization holds back writing headers, as it doesn't allow - // indefinite length structs. This allows us to always compute the length. - self.push_header(Some(self.cache_values.len()))?; + match self.serializer.canonicalization { + None => { + if self.length.is_none() { + // Not canonical and no length => indefinite length break. + self.serializer.encoder.push(Header::Break)?; + } + } + + #[cfg(not(feature = "canonical"))] + Some(_) => {} - for value in self.cache_values.iter() { - self.serializer.encoder.write_all(&value)?; + #[cfg(feature = "canonical")] + Some(_scheme) => { + // Canonical serialization holds back writing headers, as it doesn't allow + // indefinite length structs. This allows us to always compute the length. + self.push_header(Some(self.cache_values.len()))?; + + for value in self.cache_values.iter() { + self.serializer.encoder.write_all(&value)?; + } } - } else if self.length.is_none() { - // Not canonical and no length => indefinite length break. - self.serializer.encoder.push(Header::Break)?; } Ok(()) @@ -407,67 +408,73 @@ macro_rules! end { macro_rules! end_map { () => { + #[allow(unused_mut)] #[inline] fn end(mut self) -> Result<(), Self::Error> { - if self.serializer.canonicalization.is_sorting() { - // Canonical serialization holds back writing headers, as it doesn't allow - // indefinite length structs. This allows us to always compute the length. - self.push_header(Some(self.cache_keys.len()))?; - } - - // Sort our cached output and write it to the encoder. match self.serializer.canonicalization { - CanonicalizationScheme::None => {} - #[cfg(feature = "std")] - CanonicalizationScheme::Rfc8049 => { - // keys get sorted in lexicographical byte order - let keys = self.cache_keys; - let values = self.cache_values; - - debug_assert_eq!( - keys.len(), values.len(), - "ciborium error: canonicalization failed, different number of keys and values?"); - - let mut pairs: Vec<_> = - keys.iter().zip(values.iter()).collect(); - - pairs.sort(); - - for (key, value) in pairs.iter() { - self.serializer.encoder.write_all(&key)?; - self.serializer.encoder.write_all(&value)?; + None => { + if self.length.is_none() { + // Not canonical and no length => indefinite length break. + self.serializer.encoder.push(Header::Break)?; } } - #[cfg(feature = "std")] - CanonicalizationScheme::Rfc7049 => { - // keys get sorted in length-first byte order - let keys = self.cache_keys; - let values = self.cache_values; - - debug_assert_eq!( - keys.len(), values.len(), - "ciborium error: canonicalization failed, different number of keys and values?"); - - let mut pairs: Vec<_> = - keys.iter() - .map(|key| (key.len(), key)) // length-first ordering - .zip(values.iter()) - .collect(); - - pairs.sort(); - - for ((_, key), value) in pairs.iter() { - self.serializer.encoder.write_all(&key)?; - self.serializer.encoder.write_all(&value)?; + + #[cfg(not(feature = "canonical"))] + Some(_) => unreachable!(), + + #[cfg(feature = "canonical")] + Some(scheme) => { + // Canonical serialization holds back writing headers, as it doesn't allow + // indefinite length structs. This allows us to always compute the length. + self.push_header(Some(self.cache_keys.len()))?; + + // Sort our cached output and write it to the encoder. + match scheme { + CanonicalizationScheme::Rfc8049 => { + // keys get sorted in lexicographical byte order + let keys = self.cache_keys; + let values = self.cache_values; + + debug_assert_eq!( + keys.len(), values.len(), + "ciborium error: canonicalization failed, different number of keys and values?"); + + let mut pairs: Vec<_> = + keys.iter().zip(values.iter()).collect(); + + pairs.sort(); + + for (key, value) in pairs.iter() { + self.serializer.encoder.write_all(&key)?; + self.serializer.encoder.write_all(&value)?; + } + } + CanonicalizationScheme::Rfc7049 => { + // keys get sorted in length-first byte order + let keys = self.cache_keys; + let values = self.cache_values; + + debug_assert_eq!( + keys.len(), values.len(), + "ciborium error: canonicalization failed, different number of keys and values?"); + + let mut pairs: Vec<_> = + keys.iter() + .map(|key| (key.len(), key)) // length-first ordering + .zip(values.iter()) + .collect(); + + pairs.sort(); + + for ((_, key), value) in pairs.iter() { + self.serializer.encoder.write_all(&key)?; + self.serializer.encoder.write_all(&value)?; + } + } } } } - if self.length.is_none() && !self.serializer.canonicalization.is_sorting() { - // Not canonical and no length => indefinite length break. - self.serializer.encoder.push(Header::Break)?; - } - Ok(()) } }; @@ -498,9 +505,9 @@ pub struct CollectionSerializer<'a, W> { /// been written yet. Only relevant for tag collections. tag_written: bool, - #[cfg(feature = "std")] + #[cfg(feature = "canonical")] cache_keys: Vec>, - #[cfg(feature = "std")] + #[cfg(feature = "canonical")] cache_values: Vec>, } @@ -508,7 +515,7 @@ impl<'a, W: Write> CollectionSerializer<'a, W> where W::Error: core::fmt::Debug, { - #[inline] + #[inline(always)] fn new( serializer: &'a mut Serializer, collection_type: CollectionType, @@ -519,33 +526,72 @@ where collection_type, length, tag_written: false, - #[cfg(feature = "std")] - cache_keys: Vec::with_capacity(0), - #[cfg(feature = "std")] - cache_values: Vec::with_capacity(0), + #[cfg(feature = "canonical")] + cache_keys: Vec::new(), + #[cfg(feature = "canonical")] + cache_values: Vec::new(), }; - if !collection_serializer.serializer.canonicalization.is_sorting() { + if collection_serializer.serializer.canonicalization.is_none() { collection_serializer.push_header(length)?; } Ok(collection_serializer) } - #[inline] + #[inline(always)] fn push_header(&mut self, length: Option) -> Result<(), Error> { match self.collection_type { - CollectionType::Array => { - Ok(self.serializer.encoder.push(Header::Array(length))?) - } - CollectionType::Map => { - Ok(self.serializer.encoder.push(Header::Map(length))?) - } + CollectionType::Array => Ok(self.serializer.encoder.push(Header::Array(length))?), + CollectionType::Map => Ok(self.serializer.encoder.push(Header::Map(length))?), // tag headers are always written directly in SerializeTupleVariant::serialize_field // as they don't contain a potentially unknown length CollectionType::Tag => Ok(()), } } + + #[inline(always)] + fn inline_serialize_key( + &mut self, + key: &U, + ) -> Result<(), Error> { + match self.serializer.canonicalization { + None => key.serialize(&mut *self.serializer), + + #[cfg(not(feature = "canonical"))] + Some(_) => unreachable!(), + + #[cfg(feature = "canonical")] + Some(_) => { + let key_bytes = to_vec_small(key, self.serializer.canonicalization) + .map_err(|e| Error::Value(e.to_string()))?; + self.cache_keys.push(key_bytes); + Ok(()) + } + } + } + + #[inline(always)] + fn inline_serialize_value( + &mut self, + value: &U, + ) -> Result<(), Error> { + match self.serializer.canonicalization { + None => value.serialize(&mut *self.serializer), + + #[cfg(not(feature = "canonical"))] + Some(_) => unreachable!(), + + #[cfg(feature = "canonical")] + Some(_) => { + // use to_vec_canonical, we expect values to be bigger than keys + let value_bytes = to_vec_canonical(value, self.serializer.canonicalization) + .map_err(|e| Error::Value(e.to_string()))?; + self.cache_values.push(value_bytes); + Ok(()) + } + } + } } impl<'a, W: Write> ser::SerializeSeq for CollectionSerializer<'a, W> @@ -560,15 +606,7 @@ where &mut self, value: &U, ) -> Result<(), Self::Error> { - #[cfg(feature = "std")] - if self.serializer.canonicalization.is_sorting() { - let value_bytes = to_vec_small(value, self.serializer.canonicalization) - .map_err(|e| Error::Value(e.to_string()))?; - self.cache_values.push(value_bytes); - return Ok(()); - } - - value.serialize(&mut *self.serializer) + self.inline_serialize_value(value) } end!(); @@ -586,15 +624,7 @@ where &mut self, value: &U, ) -> Result<(), Self::Error> { - #[cfg(feature = "std")] - if self.serializer.canonicalization.is_sorting() { - let value_bytes = to_vec_small(value, self.serializer.canonicalization) - .map_err(|e| Error::Value(e.to_string()))?; - self.cache_values.push(value_bytes); - return Ok(()); - } - - value.serialize(&mut *self.serializer) + self.inline_serialize_value(value) } end!(); @@ -612,15 +642,7 @@ where &mut self, value: &U, ) -> Result<(), Self::Error> { - #[cfg(feature = "std")] - if self.serializer.canonicalization.is_sorting() { - let value_bytes = to_vec_small(value, self.serializer.canonicalization) - .map_err(|e| Error::Value(e.to_string()))?; - self.cache_values.push(value_bytes); - return Ok(()); - } - - value.serialize(&mut *self.serializer) + self.inline_serialize_value(value) } end!(); @@ -639,15 +661,8 @@ where value: &U, ) -> Result<(), Self::Error> { if self.tag_written || !matches!(self.collection_type, CollectionType::Tag) { - #[cfg(feature = "std")] - if self.serializer.canonicalization.is_sorting() { - let value_bytes = to_vec_small(value, self.serializer.canonicalization) - .map_err(|e| Error::Value(e.to_string()))?; - self.cache_values.push(value_bytes); - return Ok(()); - } - - return value.serialize(&mut *self.serializer); + // untagged tuples are CollectionType::Array to skip writing the tag header + return self.inline_serialize_value(value); } self.tag_written = true; @@ -670,14 +685,7 @@ where #[inline] fn serialize_key(&mut self, key: &U) -> Result<(), Self::Error> { - if self.serializer.canonicalization.is_sorting() { - let key_bytes = to_vec_small(key, self.serializer.canonicalization) - .map_err(|e| Error::Value(e.to_string()))?; - self.cache_keys.push(key_bytes); - return Ok(()); - } - - key.serialize(&mut *self.serializer) + self.inline_serialize_key(key) } #[inline] @@ -685,15 +693,7 @@ where &mut self, value: &U, ) -> Result<(), Self::Error> { - #[cfg(feature = "std")] - if self.serializer.canonicalization.is_sorting() { - let value_bytes = to_vec_small(value, self.serializer.canonicalization) - .map_err(|e| Error::Value(e.to_string()))?; - self.cache_values.push(value_bytes); - return Ok(()); - } - - value.serialize(&mut *self.serializer) + self.inline_serialize_value(value) } end_map!(); @@ -712,19 +712,8 @@ where key: &'static str, value: &U, ) -> Result<(), Self::Error> { - #[cfg(feature = "std")] - if self.serializer.canonicalization.is_sorting() { - let key_bytes = to_vec_small(key, self.serializer.canonicalization) - .map_err(|e| Error::Value(e.to_string()))?; - self.cache_keys.push(key_bytes); - let value_bytes = to_vec_small(value, self.serializer.canonicalization) - .map_err(|e| Error::Value(e.to_string()))?; - self.cache_values.push(value_bytes); - return Ok(()); - } - - key.serialize(&mut *self.serializer)?; - value.serialize(&mut *self.serializer)?; + self.inline_serialize_key(key)?; + self.inline_serialize_value(value)?; Ok(()) } @@ -744,19 +733,8 @@ where key: &'static str, value: &U, ) -> Result<(), Self::Error> { - #[cfg(feature = "std")] - if self.serializer.canonicalization.is_sorting() { - let key_bytes = to_vec_small(key, self.serializer.canonicalization) - .map_err(|e| Error::Value(e.to_string()))?; - self.cache_keys.push(key_bytes); - let value_bytes = to_vec_small(value, self.serializer.canonicalization) - .map_err(|e| Error::Value(e.to_string()))?; - self.cache_values.push(value_bytes); - return Ok(()); - } - - key.serialize(&mut *self.serializer)?; - value.serialize(&mut *self.serializer)?; + self.inline_serialize_key(key)?; + self.inline_serialize_value(value)?; Ok(()) } @@ -768,14 +746,14 @@ where /// Uses a smaller Vec buffer, as we are expecting smaller keys/values to be serialized. /// /// We use a very small buffer (2 words) to ensure it's cheap to initialize the Vec. Often the keys -/// and values may only be a couple bytes long such as with integer values. Some kind of type length -/// hint could help in the future, or perhaps using a smallvec crate too. +/// and values may only be a couple bytes long such as with integer values. +#[cfg(feature = "canonical")] #[inline] -pub fn to_vec_small( +fn to_vec_small( value: &T, - canonicalization_scheme: CanonicalizationScheme, + canonicalization_scheme: Option, ) -> Result, Error> { - let mut buffer = std::vec::Vec::with_capacity(16); + let mut buffer = Vec::with_capacity(256); let mut serializer = Serializer::new(&mut buffer, canonicalization_scheme); value.serialize(&mut serializer)?; Ok(buffer) @@ -802,8 +780,8 @@ pub fn to_vec_small( #[cfg(feature = "std")] #[inline] pub fn to_vec(value: &T) -> Result, Error> { - let mut buffer = std::vec::Vec::with_capacity(128); - let mut serializer = Serializer::new(&mut buffer, CanonicalizationScheme::None); + let mut buffer = Vec::with_capacity(1024); + let mut serializer = Serializer::new(&mut buffer, None); value.serialize(&mut serializer)?; Ok(buffer) } @@ -823,17 +801,17 @@ pub fn to_vec(value: &T) -> Result, Error( value: &T, - scheme: CanonicalizationScheme, + scheme: Option, ) -> Result, Error> { - let mut buffer = std::vec::Vec::with_capacity(128); + let mut buffer = Vec::with_capacity(1024); let mut serializer = Serializer::new(&mut buffer, scheme); value.serialize(&mut serializer)?; Ok(buffer) @@ -890,16 +868,16 @@ where /// let example = Example { a: 42, aa: 420, b: 4200 }; /// /// let mut bytes = Vec::new(); -/// into_writer_canonical(&example, &mut bytes, CanonicalizationScheme::Rfc8049).unwrap(); +/// into_writer_canonical(&example, &mut bytes, Some(CanonicalizationScheme::Rfc8049)).unwrap(); /// /// assert_eq!(hex::encode(&bytes), "a36161182a61621910686261611901a4"); /// ``` -#[cfg(feature = "std")] +#[cfg(feature = "canonical")] #[inline] pub fn into_writer_canonical( value: &T, writer: W, - scheme: CanonicalizationScheme, + scheme: Option, ) -> Result<(), Error> where W::Error: core::fmt::Debug, diff --git a/ciborium/tests/canonical.rs b/ciborium/tests/canonical.rs index 5897827..30101cb 100644 --- a/ciborium/tests/canonical.rs +++ b/ciborium/tests/canonical.rs @@ -65,7 +65,7 @@ fn map_old() { /// Use length-first ordering for keys. #[test] -#[cfg(feature = "std")] +#[cfg(feature = "canonical")] fn map_rfc7049() { use ciborium::ser::CanonicalizationScheme; @@ -79,10 +79,11 @@ fn map_rfc7049() { map.insert(cval!("z"), val!(4)); map.insert(cval!("aa"), val!(6)); - let bytes1 = ciborium::ser::to_vec_canonical(&map, CanonicalizationScheme::Rfc7049).unwrap(); + let bytes1 = + ciborium::ser::to_vec_canonical(&map, Some(CanonicalizationScheme::Rfc7049)).unwrap(); assert_eq!( - hex::encode(&bytes1), + hex::encode(bytes1), "a80a002001f402186403617a048120056261610681186407" ); } @@ -93,7 +94,7 @@ fn map_rfc7049() { /// /// [RFC 8949]: https://www.rfc-editor.org/rfc/rfc8949.html#name-core-deterministic-encoding #[test] -#[cfg(feature = "std")] +#[cfg(feature = "canonical")] fn map_rfc8949() { use ciborium::ser::CanonicalizationScheme; @@ -107,10 +108,11 @@ fn map_rfc8949() { map.insert(cval!("z"), val!(4)); map.insert(cval!("aa"), val!(6)); - let bytes1 = ciborium::ser::to_vec_canonical(&map, CanonicalizationScheme::Rfc8049).unwrap(); + let bytes1 = + ciborium::ser::to_vec_canonical(&map, Some(CanonicalizationScheme::Rfc8049)).unwrap(); assert_eq!( - hex::encode(&bytes1), + hex::encode(bytes1), "a80a001864032001617a046261610681186407812005f402" ); } diff --git a/ciborium/tests/codec.rs b/ciborium/tests/codec.rs index 560762b..bb66364 100644 --- a/ciborium/tests/codec.rs +++ b/ciborium/tests/codec.rs @@ -296,7 +296,7 @@ fn codec<'de, T: Serialize + Clone, V: Debug + PartialEq + DeserializeOwned, F: assert_eq!(bytes, encoded); let mut encoded = Vec::new(); - into_writer(&value, &mut encoded).unwrap(); + into_writer(&input, &mut encoded).unwrap(); eprintln!("{:x?} == {:x?}", bytes, encoded); assert_eq!(bytes, encoded);