Skip to content

Commit

Permalink
fix: make sure that structs are serialized correctly (#34)
Browse files Browse the repository at this point in the history
Rust structs have their entries ordered the same way as they are
defined. When serialized to CBOR, they become maps by default. In
 DAG-CBOR the maps need to have a specific order, hence the keys
might need to be re-ordered, so that they are independent of the
order they were defined. This commit makes sure that it's actually
the case.

Fixes #31.
  • Loading branch information
vmx authored Apr 24, 2024
1 parent ea977f2 commit 3464937
Show file tree
Hide file tree
Showing 3 changed files with 113 additions and 35 deletions.
93 changes: 59 additions & 34 deletions src/ser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,8 @@ impl<'a, W: enc::Write> serde::Serializer for &'a mut Serializer<W> {
type SerializeTupleStruct = BoundedCollect<'a, W>;
type SerializeTupleVariant = BoundedCollect<'a, W>;
type SerializeMap = CollectMap<'a, W>;
type SerializeStruct = BoundedCollect<'a, W>;
type SerializeStructVariant = BoundedCollect<'a, W>;
type SerializeStruct = CollectMap<'a, W>;
type SerializeStructVariant = CollectMap<'a, W>;

#[inline]
fn serialize_bool(self, v: bool) -> Result<Self::Ok, Self::Error> {
Expand Down Expand Up @@ -265,7 +265,7 @@ impl<'a, W: enc::Write> serde::Serializer for &'a mut Serializer<W> {
len: usize,
) -> Result<Self::SerializeStruct, Self::Error> {
enc::MapStartBounded(len).encode(&mut self.writer)?;
Ok(BoundedCollect { ser: self })
Ok(CollectMap::new(self))
}

#[inline]
Expand All @@ -279,7 +279,7 @@ impl<'a, W: enc::Write> serde::Serializer for &'a mut Serializer<W> {
enc::MapStartBounded(1).encode(&mut self.writer)?;
variant.encode(&mut self.writer)?;
enc::MapStartBounded(len).encode(&mut self.writer)?;
Ok(BoundedCollect { ser: self })
Ok(CollectMap::new(self))
}

#[inline]
Expand Down Expand Up @@ -429,14 +429,51 @@ pub struct CollectMap<'a, W> {
ser: &'a mut Serializer<W>,
}

impl<'a, W> CollectMap<'a, W> {
impl<'a, W> CollectMap<'a, W>
where
W: enc::Write,
{
fn new(ser: &'a mut Serializer<W>) -> Self {
Self {
buffer: BufWriter::new(Vec::new()),
entries: Vec::new(),
ser,
}
}

fn serialize<T: Serialize + ?Sized>(
&mut self,
maybe_key: Option<&'static str>,
value: &T,
) -> Result<(), EncodeError<W::Error>> {
// Instantiate a new serializer, so that the buffer can be re-used.
let mut mem_serializer = Serializer::new(&mut self.buffer);
if let Some(key) = maybe_key {
key.serialize(&mut mem_serializer)
.map_err(|_| EncodeError::Msg("Struct key cannot be serialized.".to_string()))?;
}
value
.serialize(&mut mem_serializer)
.map_err(|_| EncodeError::Msg("Struct value cannot be serialized.".to_string()))?;

self.entries.push(self.buffer.buffer().to_vec());
self.buffer.clear();

Ok(())
}

fn end(mut self) -> Result<(), EncodeError<W::Error>> {
// This sorting step makes sure we have the expected order of the keys. Byte-wise
// comparison over the encoded forms gives us the right order as keys in DAG-CBOR are
// always (text) strings, hence have the same CBOR major type 3. The length of the string
// is encoded in the prefix bits along with the major type. This means that a shorter string
// always sorts before a longer string even with the compact length representation.
self.entries.sort_unstable();
for entry in self.entries {
self.ser.writer.push(&entry)?;
}
Ok(())
}
}

impl<W> serde::ser::SerializeMap for CollectMap<'_, W>
Expand All @@ -448,7 +485,8 @@ where

#[inline]
fn serialize_key<T: Serialize + ?Sized>(&mut self, key: &T) -> Result<(), Self::Error> {
// Instantiate a new serializer, so that the buffer can be re-used.
// The key needs to be add to the buffer without any further operations. Serializing the
// value will then do the necessary flushing etc.
let mut mem_serializer = Serializer::new(&mut self.buffer);
key.serialize(&mut mem_serializer)
.map_err(|_| EncodeError::Msg("Map key cannot be serialized.".to_string()))?;
Expand All @@ -457,34 +495,20 @@ where

#[inline]
fn serialize_value<T: Serialize + ?Sized>(&mut self, value: &T) -> Result<(), Self::Error> {
// Instantiate a new serializer, so that the buffer can be re-used.
let mut mem_serializer = Serializer::new(&mut self.buffer);
value
.serialize(&mut mem_serializer)
.map_err(|_| EncodeError::Msg("Map value cannot be serialized.".to_string()))?;

self.entries.push(self.buffer.buffer().to_vec());
self.buffer.clear();

Ok(())
self.serialize(None, value)
}

#[inline]
fn end(mut self) -> Result<Self::Ok, Self::Error> {
fn end(self) -> Result<Self::Ok, Self::Error> {
enc::MapStartBounded(self.entries.len()).encode(&mut self.ser.writer)?;
// This sorting step makes sure we have the expected order of the keys. Byte-wise
// comparison gives us the right order as keys in DAG-CBOR are always (text) strings, hence
// have the same CBOR major type 3. The length of the string is encoded in the following
// bits. This means that a shorter string sorts before a longer string.
self.entries.sort_unstable();
for entry in self.entries {
self.ser.writer.push(&entry)?;
}
Ok(())
self.end()
}
}

impl<W: enc::Write> serde::ser::SerializeStruct for BoundedCollect<'_, W> {
impl<W> serde::ser::SerializeStruct for CollectMap<'_, W>
where
W: enc::Write,
{
type Ok = ();
type Error = EncodeError<W::Error>;

Expand All @@ -494,17 +518,19 @@ impl<W: enc::Write> serde::ser::SerializeStruct for BoundedCollect<'_, W> {
key: &'static str,
value: &T,
) -> Result<(), Self::Error> {
key.serialize(&mut *self.ser)?;
value.serialize(&mut *self.ser)
self.serialize(Some(key), value)
}

#[inline]
fn end(self) -> Result<Self::Ok, Self::Error> {
Ok(())
self.end()
}
}

impl<W: enc::Write> serde::ser::SerializeStructVariant for BoundedCollect<'_, W> {
impl<W> serde::ser::SerializeStructVariant for CollectMap<'_, W>
where
W: enc::Write,
{
type Ok = ();
type Error = EncodeError<W::Error>;

Expand All @@ -514,13 +540,12 @@ impl<W: enc::Write> serde::ser::SerializeStructVariant for BoundedCollect<'_, W>
key: &'static str,
value: &T,
) -> Result<(), Self::Error> {
key.serialize(&mut *self.ser)?;
value.serialize(&mut *self.ser)
self.serialize(Some(key), value)
}

#[inline]
fn end(self) -> Result<Self::Ok, Self::Error> {
Ok(())
self.end()
}
}

Expand Down
53 changes: 53 additions & 0 deletions tests/ser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ use std::{collections::BTreeMap, iter};

use serde::de::value::{self, MapDeserializer, SeqDeserializer};
use serde_bytes::{ByteBuf, Bytes};
use serde_derive::Serialize;
use serde_ipld_dagcbor::{
from_slice,
ser::{BufWriter, Serializer},
Expand Down Expand Up @@ -190,3 +191,55 @@ fn test_non_unbound_list() {
let result = serializer.into_inner().into_inner();
assert_eq!(result, expected);
}

#[test]
fn test_struct_canonical() {
#[derive(Serialize)]
struct First {
a: u32,
b: u32,
}
#[derive(Serialize)]
struct Second {
b: u32,
a: u32,
}

let first = First { a: 1, b: 2 };
let second = Second { a: 1, b: 2 };

let first_bytes = serde_ipld_dagcbor::to_vec(&first).unwrap();
let second_bytes = serde_ipld_dagcbor::to_vec(&second).unwrap();

assert_eq!(first_bytes, second_bytes);
// Do not only make sure that the order is the same, but also that it's correct.
assert_eq!(first_bytes, b"\xa2\x61a\x01\x61b\x02")
}

#[test]
fn test_struct_variant_canonical() {
// The `abc` is there to make sure it really follows the DAG-CBOR sorting order, which sorts by
// length of the keys first, then lexicographically. It means that `abc` sorts *after* `b`.
#[derive(Serialize)]
enum First {
Data { a: u8, b: u8, abc: u8 },
}

#[derive(Serialize)]
enum Second {
Data { b: u8, abc: u8, a: u8 },
}

let first = First::Data { a: 1, b: 2, abc: 3 };
let second = Second::Data { a: 1, b: 2, abc: 3 };

let first_bytes = serde_ipld_dagcbor::to_vec(&first).unwrap();
let second_bytes = serde_ipld_dagcbor::to_vec(&second).unwrap();

assert_eq!(first_bytes, second_bytes);
// Do not only make sure that the order is the same, but also that it's correct.
assert_eq!(
first_bytes,
b"\xa1\x64Data\xa3\x61a\x01\x61b\x02\x63abc\x03"
)
}
2 changes: 1 addition & 1 deletion tests/std_types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ testcase!(test_person_struct,
year_of_birth: 1906,
profession: Some("computer scientist".to_string()),
},
"a3646e616d656c477261636520486f707065726d796561725f6f665f62697274681907726a70726f66657373696f6e72636f6d707574657220736369656e74697374");
"a3646e616d656c477261636520486f707065726a70726f66657373696f6e72636f6d707574657220736369656e746973746d796561725f6f665f6269727468190772");

#[derive(Debug, PartialEq, Deserialize, Serialize)]
struct OptionalPerson {
Expand Down

0 comments on commit 3464937

Please sign in to comment.