Skip to content

Commit

Permalink
feat: introduce no-cid-as-bytes feature (#23)
Browse files Browse the repository at this point in the history
This commit changes the default behaviour of the deserializer. It's now
possible to deserialize CIDs into bytes. This is needed in order to have
the Serde derives for internally tagged and untagged enums working correctly.

If you need the current behaviour of erroring at run-time in case a CID
is tried to be deserialized as bytes, you can enable the `no-cid-as-bytes`
feature.

BREAKING CHANGE: CIDs may be deserialized into bytes. In order to restore
the original behaviour, enable the `no-cid-as-bytes` feature.

---------

Co-authored-by: sugyan <[email protected]>
Co-authored-by: Rod Vagg <[email protected]>
  • Loading branch information
3 people authored Feb 23, 2024
1 parent 8311876 commit 2afcb96
Show file tree
Hide file tree
Showing 6 changed files with 251 additions and 1 deletion.
6 changes: 5 additions & 1 deletion .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,5 +37,9 @@ jobs:
shell: bash

- name: Test
run: cargo test --workspace
run: cargo test --all-targets --workspace
shell: bash

- name: Test no-cid-as-bytes feature
run: cargo test --all-targets --workspace --features no-cid-as-bytes
shell: bash
2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,5 @@ serde_bytes = { version = "0.11.9", default-features = false, features = ["alloc
[features]
default = ["std"]
std = ["cbor4ii/use_std", "cid/std", "serde/std", "serde_bytes/std"]
# Prevent deserializing CIDs as bytes as much as possible.
no-cid-as-bytes = []
10 changes: 10 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,16 @@ fn main() -> Result<(), Box<dyn Error>> {
```


Features
--------

### `no-cid-as-bytes`

Sometimes it is desired that a CID is not accidentally deserialized into bytes. This can happen because the intermediate serde data model does not retain enough information to be able to differentiate between a bytes container and a CID container when there is a conflicting choice to be made, as in the case of some enum cases. The `no-cid-as-bytes` feature can be enabled in order to error at runtime in such cases.

The problem with that feature is, that it breaks Serde's derive attributes for [internally tagged enums](https://serde.rs/enum-representations.html#internally-tagged) (`#[serde(tag = "sometag")]`) and [untagged enums](https://serde.rs/enum-representations.html#untagged) (`#serde(untagged)`). If this feature is enabled and you still need similar functionality, you could implement a deserializer manually. Examples of how to do that are in the [enum example](examples/enums.rs).


License
-------

Expand Down
138 changes: 138 additions & 0 deletions examples/enums.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
/// Serde untagged (`#[serde(untagged)]`) and internaly tagged enums (`#[serde(tag = "tag")]`) are
/// not supported by CIDs. Here examples are provided on how to implement similar behaviour. This
/// file also contains an example for a kinded enum.
use std::convert::{TryFrom, TryInto};

use cid::Cid;
use libipld_core::ipld::Ipld;
use serde::{de, Deserialize};
use serde_bytes::ByteBuf;
use serde_ipld_dagcbor::from_slice;

/// The CID `bafkreibme22gw2h7y2h7tg2fhqotaqjucnbc24deqo72b6mkl2egezxhvy` encoded as CBOR
/// 42(h'00015512202C26B46B68FFC68FF99B453C1D30413413422D706483BFA0F98A5E886266E7AE')
const CBOR_CID_FIXTURE: [u8; 41] = [
0xd8, 0x2a, 0x58, 0x25, 0x00, 0x01, 0x55, 0x12, 0x20, 0x2c, 0x26, 0xb4, 0x6b, 0x68, 0xff, 0xc6,
0x8f, 0xf9, 0x9b, 0x45, 0x3c, 0x1d, 0x30, 0x41, 0x34, 0x13, 0x42, 0x2d, 0x70, 0x64, 0x83, 0xbf,
0xa0, 0xf9, 0x8a, 0x5e, 0x88, 0x62, 0x66, 0xe7, 0xae,
];

/// This enum shows how an internally tagged enum could be implemented.
#[derive(Debug, PartialEq)]
enum CidInInternallyTaggedEnum {
MyCid { cid: Cid },
}

// This manual deserializer implementation works as if you would derive `Deserialize` and add
// `#[serde(tag = "type")]` to the `CidInternallyTaggedEnum` enum.
impl<'de> de::Deserialize<'de> for CidInInternallyTaggedEnum {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: de::Deserializer<'de>,
{
#[derive(Deserialize)]
struct Tagged {
r#type: String,
cid: Cid,
}

let Tagged { r#type, cid } = Deserialize::deserialize(deserializer)?;
if r#type == "MyCid" {
Ok(CidInInternallyTaggedEnum::MyCid { cid })
} else {
Err(de::Error::custom("No matching enum variant found"))
}
}
}

/// This enum shows how an untagged enum could be implemented.
#[derive(Debug, PartialEq)]
enum CidInUntaggedEnum {
MyCid(Cid),
}

// This manual deserializer implementation works as if you would derive `Deserialize` and add
// `#[serde(untagged)]`.
impl<'de> de::Deserialize<'de> for CidInUntaggedEnum {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: de::Deserializer<'de>,
{
Cid::deserialize(deserializer)
.map(CidInUntaggedEnum::MyCid)
.map_err(|_| de::Error::custom("No matching enum variant found"))
}
}

/// This enum shows how a kinded enum could be implemented.
#[derive(Debug, PartialEq)]
pub enum Kinded {
Bytes(ByteBuf),
Link(Cid),
}

impl TryFrom<Ipld> for Kinded {
type Error = ();

fn try_from(ipld: Ipld) -> Result<Self, Self::Error> {
match ipld {
Ipld::Bytes(bytes) => Ok(Self::Bytes(ByteBuf::from(bytes))),
Ipld::Link(cid) => Ok(Self::Link(cid)),
_ => Err(()),
}
}
}

impl<'de> de::Deserialize<'de> for Kinded {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: de::Deserializer<'de>,
{
Ipld::deserialize(deserializer).and_then(|ipld| {
ipld.try_into()
.map_err(|_| de::Error::custom("No matching enum variant found"))
})
}
}

pub fn main() {
let cid: Cid = from_slice(&CBOR_CID_FIXTURE).unwrap();

// {"type": "MyCid", "cid": 42(h'00015512202C26B46B68FFC68FF99B453C1D30413413422D706483BFA0F98A5E886266E7AE')}
let cbor_internally_tagged_enum = [
&[
0xa2, 0x64, 0x74, 0x79, 0x70, 0x65, 0x65, 0x4d, 0x79, 0x43, 0x69, 0x64, 0x63, 0x63,
0x69, 0x64,
],
&CBOR_CID_FIXTURE[..],
]
.concat();
assert_eq!(
from_slice::<CidInInternallyTaggedEnum>(&cbor_internally_tagged_enum).unwrap(),
CidInInternallyTaggedEnum::MyCid { cid }
);

assert_eq!(
from_slice::<CidInUntaggedEnum>(&CBOR_CID_FIXTURE).unwrap(),
CidInUntaggedEnum::MyCid(cid)
);

assert_eq!(
from_slice::<Kinded>(&CBOR_CID_FIXTURE).unwrap(),
Kinded::Link(cid)
);

// The CID without the tag 42 prefix, so that it decodes as just bytes.
let cbor_bytes = &CBOR_CID_FIXTURE[2..];
let decoded_bytes: Kinded = from_slice(cbor_bytes).unwrap();
// The CBOR decoded bytes don't contain the prefix with the bytes type identifier and the
// length.
let bytes = cbor_bytes[2..].to_vec();
assert_eq!(decoded_bytes, Kinded::Bytes(ByteBuf::from(bytes)));
}

// Make it possible to run this example as test.
#[test]
fn test_main() {
main()
}
5 changes: 5 additions & 0 deletions src/de.rs
Original file line number Diff line number Diff line change
Expand Up @@ -618,6 +618,11 @@ struct CidDeserializer<'a, R>(&'a mut Deserializer<R>);
impl<'de, 'a, R: dec::Read<'de>> de::Deserializer<'de> for &'a mut CidDeserializer<'a, R> {
type Error = DecodeError<R::Error>;

#[cfg(not(feature = "no-cid-as-bytes"))]
fn deserialize_any<V: de::Visitor<'de>>(self, visitor: V) -> Result<V::Value, Self::Error> {
self.deserialize_bytes(visitor)
}
#[cfg(feature = "no-cid-as-bytes")]
fn deserialize_any<V: de::Visitor<'de>>(self, _visitor: V) -> Result<V::Value, Self::Error> {
Err(de::Error::custom(
"Only bytes can be deserialized into a CID",
Expand Down
91 changes: 91 additions & 0 deletions tests/cid.rs
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,32 @@ fn test_cid_not_as_bytes() {
.expect_err("shouldn't have parsed a tagged CID as a byte array");
from_slice::<serde_bytes::ByteBuf>(&cbor_cid[2..])
.expect("should have parsed an untagged CID as a byte array");

#[derive(Debug, Deserialize, PartialEq)]
struct NewType(ByteBuf);

#[derive(Debug, Deserialize, PartialEq)]
#[serde(untagged)]
enum BytesInEnum {
MyCid(NewType),
}

// With the `no-cid-as-bytes` feature enabled, we make sure that it will error, when we try to
// decode a CID as bytes.
#[cfg(feature = "no-cid-as-bytes")]
from_slice::<BytesInEnum>(&cbor_cid)
.expect_err("shouldn't have parsed a tagged CID as byte array");

// With that feature disabled, then it will decode the CID (without the TAG and the zero
// prefix) as bytes.
#[cfg(not(feature = "no-cid-as-bytes"))]
{
let cid_without_tag = &cbor_cid[5..];
assert_eq!(
from_slice::<BytesInEnum>(&cbor_cid).unwrap(),
BytesInEnum::MyCid(NewType(ByteBuf::from(cid_without_tag)))
);
}
}

/// Test whether a binary CID could be serialized if it isn't prefixed by tag 42. It should fail.
Expand Down Expand Up @@ -219,6 +245,71 @@ fn test_cid_in_kinded_enum_with_newtype() {
assert!(decoded_random_bytes.is_err());
}

#[test]
fn test_cid_in_tagged_enum() {
#[derive(Debug, Deserialize, PartialEq)]
pub enum Externally {
Cid(Cid),
}

#[derive(Debug, Deserialize, PartialEq)]
#[serde(tag = "type")]
pub enum Internally {
Cid { cid: Cid },
}

#[derive(Debug, Deserialize, PartialEq)]
#[serde(untagged)]
pub enum Untagged {
Cid(Cid),
}

let cbor_cid = [
0xd8, 0x2a, 0x58, 0x25, 0x00, 0x01, 0x55, 0x12, 0x20, 0x2c, 0x26, 0xb4, 0x6b, 0x68, 0xff,
0xc6, 0x8f, 0xf9, 0x9b, 0x45, 0x3c, 0x1d, 0x30, 0x41, 0x34, 0x13, 0x42, 0x2d, 0x70, 0x64,
0x83, 0xbf, 0xa0, 0xf9, 0x8a, 0x5e, 0x88, 0x62, 0x66, 0xe7, 0xae,
];

// {"Cid": cid}
let cbor_map1 = [vec![0xa1, 0x63, 0x43, 0x69, 0x64], Vec::from(cbor_cid)].concat();

// {"cid": cid, "type": "Cid"}
let cbor_map2 = [
vec![
0xa2, 0x64, 0x74, 0x79, 0x70, 0x65, 0x63, 0x43, 0x69, 0x64, 0x63, 0x63, 0x69, 0x64,
],
Vec::from(cbor_cid),
]
.concat();

let cid = Cid::try_from(&cbor_cid[5..]).unwrap();

let decoded: Externally = from_slice(&cbor_map1).unwrap();
assert_eq!(decoded, Externally::Cid(cid));

// With the `no-cid-as-bytes` feature enabled, it's not possible to use internally tagged or
// untaggd enums. This behaviour is *not* intentionally, but incidentally due to how Serde
// internally works.. This test is only added to see what one could expect, and to get
// notified in case it ever gets supported.
#[cfg(feature = "no-cid-as-bytes")]
{
from_slice::<Internally>(&cbor_map2)
.expect_err("shouldn't be able to decode the intanlly tagged enum");
from_slice::<Untagged>(&cbor_cid)
.expect_err("shouldn't be able to decode the untagged enum");
}

// With that feature disabled, it's the expected desired behaviour.
#[cfg(not(feature = "no-cid-as-bytes"))]
{
let decoded: Internally = from_slice(&cbor_map2).unwrap();
assert_eq!(decoded, Internally::Cid { cid });

let decoded: Untagged = from_slice(&cbor_cid).unwrap();
assert_eq!(decoded, Untagged::Cid(cid));
}
}

#[test]
fn test_cid_empty_errors() {
// Tag 42 with zero bytes
Expand Down

0 comments on commit 2afcb96

Please sign in to comment.