From 83dd4aa88c503871524840ab2a5209ceddd92c9f Mon Sep 17 00:00:00 2001 From: Liam Gray Date: Tue, 26 Nov 2024 19:51:18 +0000 Subject: [PATCH] CanonicalizationScheme enum type --- ciborium/src/ser/mod.rs | 101 +++++++++++++++++++++++++----------- ciborium/tests/canonical.rs | 3 +- 2 files changed, 72 insertions(+), 32 deletions(-) diff --git a/ciborium/src/ser/mod.rs b/ciborium/src/ser/mod.rs index dcdca1e..89766e1 100644 --- a/ciborium/src/ser/mod.rs +++ b/ciborium/src/ser/mod.rs @@ -11,6 +11,40 @@ use ciborium_io::Write; use ciborium_ll::*; use serde::{ser, Serialize as _}; +/// Which canonicalization scheme to use for CBOR serialization. +#[cfg(feature = "std")] +pub enum CanonicalizationScheme { + /// No canonicalization, don't sort map keys. Faster and reduces allocations. + None, + + /// Sort map keys in output according to [RFC 7049]'s deterministic encoding spec. + /// + /// Also aligns with [RFC 8949 4.2.3]'s backwards compatibility sort order. + /// + /// Uses length-first map key ordering. Eg. `["a", "b", "aa"]`. + Rfc7049, + + /// Sort map keys in output according to [RFC 8949]'s deterministic encoding spec. + /// + /// Uses bytewise lexicographic map key ordering. Eg. `["a", "aa", "b"]`. + Rfc8049, +} + +#[cfg(feature = "std")] +impl CanonicalizationScheme { + /// Does this canonicalisation scheme require sorting of keys. + pub fn is_sorting(&self) -> bool { + matches!(self, Self::Rfc7049 | Self::Rfc8049) + } + + // pub fn key(&self, key: &K) -> Result, Error> { + // let mut buffer = Vec::new(); + // let mut serializer = Serializer::new(&mut buffer, true); + // key.serialize(&mut serializer)?; + // Ok(buffer) + // } +} + /// A serializer for CBOR. pub struct Serializer { encoder: Encoder, @@ -20,7 +54,7 @@ pub struct Serializer { /// /// [RFC 8949]: https://www.rfc-editor.org/rfc/rfc8949.html#name-deterministically-encoded-c #[cfg(feature = "std")] - canonical: bool, + canonicalization: CanonicalizationScheme, } impl Serializer { @@ -31,10 +65,10 @@ impl Serializer { /// /// [RFC 8949]: https://www.rfc-editor.org/rfc/rfc8949.html#name-deterministically-encoded-c #[cfg(feature = "std")] - pub fn new(encoder: impl Into>, canonical: bool) -> Self { + pub fn new(encoder: impl Into>, canonicalization: CanonicalizationScheme) -> Self { Self { encoder: encoder.into(), - canonical + canonicalization } } } @@ -45,7 +79,7 @@ impl From for Serializer { Self { encoder: writer.into(), #[cfg(feature = "std")] - canonical: false, + canonicalization: CanonicalizationScheme::None, } } } @@ -56,7 +90,7 @@ impl From> for Serializer { Self { encoder: writer, #[cfg(feature = "std")] - canonical: false, + canonicalization: CanonicalizationScheme::None, } } } @@ -335,7 +369,7 @@ macro_rules! end { () => { #[inline] fn end(self) -> Result<(), Self::Error> { - if self.ending { + if self.indefinite { self.serializer.encoder.push(Header::Break)?; } @@ -350,27 +384,30 @@ macro_rules! end_map { #[inline] fn end(self) -> Result<(), Self::Error> { #[cfg(feature = "std")] - if self.serializer.canonical { + if matches!(self.serializer.canonicalization, CanonicalizationScheme::Rfc8049) { // keys get sorted in lexicographical byte order let keys = self.cache_keys; let values = self.cache_values; debug_assert_eq!( keys.len(), values.len(), - "ciborium error: canonicalization failed, keys and values must have same length."); + "ciborium error: canonicalization failed, different number of keys and values?"); - let mut pairs = std::collections::BTreeMap::new(); - for (key, value) in keys.iter().zip(values.iter()) { - pairs.insert(key, value); - } + let pairs: std::collections::BTreeMap<_, _> = + keys.iter().zip(values.iter()).collect(); - for (key, value) in pairs { + for (key, value) in pairs.iter() { self.serializer.encoder.write_all(&key)?; self.serializer.encoder.write_all(&value)?; } } - if self.ending { + #[cfg(feature = "std")] + if matches!(self.serializer.canonicalization, CanonicalizationScheme::Rfc7049) { + unimplemented!("rfc7049 canonicalization not yet implemented"); + } + + if self.indefinite { self.serializer.encoder.push(Header::Break)?; } @@ -385,7 +422,9 @@ macro_rules! end_map { #[doc(hidden)] pub struct CollectionSerializer<'a, W> { serializer: &'a mut Serializer, - ending: bool, + + /// Whether the collection is indefinite length. Cannot be used with canonical serialization. + indefinite: bool, tag: bool, #[cfg(feature = "std")] @@ -395,21 +434,20 @@ pub struct CollectionSerializer<'a, W> { } impl<'a, W> CollectionSerializer<'a, W> { - pub fn new(serializer: &'a mut Serializer, ending: bool, tag: bool) -> Self { + pub fn new(serializer: &'a mut Serializer, indefinite: bool, tag: bool) -> Self { #[cfg(feature = "std")] - let capacity = match serializer.canonical { - true => 4, - false => 0, - }; + assert!( + !(serializer.canonicalization.is_sorting() && indefinite), + "ciborium error: canonical mode cannot be used with indefinite length collections"); Self { serializer, - ending, + indefinite, tag, #[cfg(feature = "std")] - cache_keys: Vec::with_capacity(capacity), + cache_keys: Vec::with_capacity(0), #[cfg(feature = "std")] - cache_values: Vec::with_capacity(capacity), + cache_values: Vec::with_capacity(0), } } } @@ -504,7 +542,7 @@ where #[inline] fn serialize_key(&mut self, key: &U) -> Result<(), Self::Error> { #[cfg(feature = "std")] - if self.serializer.canonical { + if self.serializer.canonicalization.is_sorting() { let key_bytes = to_vec(key).map_err(|e| Error::Value(e.to_string()))?; self.cache_keys.push(key_bytes); return Ok(()); @@ -519,7 +557,7 @@ where value: &U, ) -> Result<(), Self::Error> { #[cfg(feature = "std")] - if self.serializer.canonical { + if self.serializer.canonicalization.is_sorting() { let value_bytes = to_vec(value).map_err(|e| Error::Value(e.to_string()))?; self.cache_values.push(value_bytes); return Ok(()); @@ -545,7 +583,7 @@ where value: &U, ) -> Result<(), Self::Error> { #[cfg(feature = "std")] - if self.serializer.canonical { + if self.serializer.canonicalization.is_sorting() { let key_bytes = to_vec(key).map_err(|e| Error::Value(e.to_string()))?; self.cache_keys.push(key_bytes); let value_bytes = to_vec(value).map_err(|e| Error::Value(e.to_string()))?; @@ -575,7 +613,7 @@ where value: &U, ) -> Result<(), Self::Error> { #[cfg(feature = "std")] - if self.serializer.canonical { + if self.serializer.canonicalization.is_sorting() { let key_bytes = to_vec(key).map_err(|e| Error::Value(e.to_string()))?; self.cache_keys.push(key_bytes); let value_bytes = to_vec(value).map_err(|e| Error::Value(e.to_string()))?; @@ -595,7 +633,7 @@ where #[inline] pub fn to_vec(value: &T) -> Result, Error> { let mut buffer = std::vec::Vec::with_capacity(1024); - let mut serializer = Serializer::new(&mut buffer, false); + let mut serializer = Serializer::new(&mut buffer, CanonicalizationScheme::None); value.serialize(&mut serializer)?; Ok(buffer) } @@ -607,9 +645,9 @@ pub fn to_vec(value: &T) -> Result, Error(value: &T) -> Result, Error> { +pub fn to_vec_canonical(value: &T, scheme: CanonicalizationScheme) -> Result, Error> { let mut buffer = std::vec::Vec::with_capacity(1024); - let mut serializer = Serializer::new(&mut buffer, true); + let mut serializer = Serializer::new(&mut buffer, scheme); value.serialize(&mut serializer)?; Ok(buffer) } @@ -637,10 +675,11 @@ where pub fn into_writer_canonical( value: &T, writer: W, + scheme: CanonicalizationScheme, ) -> Result<(), Error> where W::Error: core::fmt::Debug, { - let mut encoder = Serializer::new(writer, true); + let mut encoder = Serializer::new(writer, scheme); value.serialize(&mut encoder) } \ No newline at end of file diff --git a/ciborium/tests/canonical.rs b/ciborium/tests/canonical.rs index 15d1438..ed3786b 100644 --- a/ciborium/tests/canonical.rs +++ b/ciborium/tests/canonical.rs @@ -7,6 +7,7 @@ use ciborium::tag::Required; use ciborium::value::CanonicalValue; use rand::prelude::*; use std::collections::BTreeMap; +use ciborium::ser::CanonicalizationScheme; macro_rules! cval { ($x:expr) => { @@ -81,7 +82,7 @@ fn map_canonical() { map.insert(cval!("z"), val!(4)); map.insert(cval!("aa"), val!(6)); - let bytes1 = ciborium::ser::to_vec_canonical(&map).unwrap(); + let bytes1 = ciborium::ser::to_vec_canonical(&map, CanonicalizationScheme::Rfc8049).unwrap(); assert_eq!( hex::encode(&bytes1),