Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: introduce no-cid-as-bytes feature #23

Merged
merged 4 commits into from
Feb 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,5 +37,9 @@ jobs:
shell: bash

- name: Test
run: cargo test --workspace
run: cargo test --all-targets --workspace
shell: bash

- name: Test no-cid-as-bytes feature
run: cargo test --all-targets --workspace --features no-cid-as-bytes
shell: bash
2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,5 @@ serde_bytes = { version = "0.11.9", default-features = false, features = ["alloc
[features]
default = ["std"]
std = ["cbor4ii/use_std", "cid/std", "serde/std", "serde_bytes/std"]
# Prevent deserializing CIDs as bytes as much as possible.
no-cid-as-bytes = []
10 changes: 10 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,16 @@ fn main() -> Result<(), Box<dyn Error>> {
```


Features
--------

### `no-cid-as-bytes`

Sometimes it is desired that a CID is not accidentally deserialized into bytes. This can happen because the intermediate serde data model does not retain enough information to be able to differentiate between a bytes container and a CID container when there is a conflicting choice to be made, as in the case of some enum cases. The `no-cid-as-bytes` feature can be enabled in order to error at runtime in such cases.

The problem with that feature is, that it breaks Serde's derive attributes for [internally tagged enums](https://serde.rs/enum-representations.html#internally-tagged) (`#[serde(tag = "sometag")]`) and [untagged enums](https://serde.rs/enum-representations.html#untagged) (`#serde(untagged)`). If this feature is enabled and you still need similar functionality, you could implement a deserializer manually. Examples of how to do that are in the [enum example](examples/enums.rs).


License
-------

Expand Down
138 changes: 138 additions & 0 deletions examples/enums.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
/// Serde untagged (`#[serde(untagged)]`) and internaly tagged enums (`#[serde(tag = "tag")]`) are
/// not supported by CIDs. Here examples are provided on how to implement similar behaviour. This
/// file also contains an example for a kinded enum.
use std::convert::{TryFrom, TryInto};

use cid::Cid;
use libipld_core::ipld::Ipld;
use serde::{de, Deserialize};
use serde_bytes::ByteBuf;
use serde_ipld_dagcbor::from_slice;

/// The CID `bafkreibme22gw2h7y2h7tg2fhqotaqjucnbc24deqo72b6mkl2egezxhvy` encoded as CBOR
/// 42(h'00015512202C26B46B68FFC68FF99B453C1D30413413422D706483BFA0F98A5E886266E7AE')
const CBOR_CID_FIXTURE: [u8; 41] = [
0xd8, 0x2a, 0x58, 0x25, 0x00, 0x01, 0x55, 0x12, 0x20, 0x2c, 0x26, 0xb4, 0x6b, 0x68, 0xff, 0xc6,
0x8f, 0xf9, 0x9b, 0x45, 0x3c, 0x1d, 0x30, 0x41, 0x34, 0x13, 0x42, 0x2d, 0x70, 0x64, 0x83, 0xbf,
0xa0, 0xf9, 0x8a, 0x5e, 0x88, 0x62, 0x66, 0xe7, 0xae,
];

/// This enum shows how an internally tagged enum could be implemented.
#[derive(Debug, PartialEq)]
enum CidInInternallyTaggedEnum {
MyCid { cid: Cid },
}

// This manual deserializer implementation works as if you would derive `Deserialize` and add
// `#[serde(tag = "type")]` to the `CidInternallyTaggedEnum` enum.
impl<'de> de::Deserialize<'de> for CidInInternallyTaggedEnum {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: de::Deserializer<'de>,
{
#[derive(Deserialize)]
struct Tagged {
r#type: String,
cid: Cid,
}

let Tagged { r#type, cid } = Deserialize::deserialize(deserializer)?;
if r#type == "MyCid" {
Ok(CidInInternallyTaggedEnum::MyCid { cid })
} else {
Err(de::Error::custom("No matching enum variant found"))
}
}
}

/// This enum shows how an untagged enum could be implemented.
#[derive(Debug, PartialEq)]
enum CidInUntaggedEnum {
MyCid(Cid),
}

// This manual deserializer implementation works as if you would derive `Deserialize` and add
// `#[serde(untagged)]`.
impl<'de> de::Deserialize<'de> for CidInUntaggedEnum {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: de::Deserializer<'de>,
{
Cid::deserialize(deserializer)
.map(CidInUntaggedEnum::MyCid)
.map_err(|_| de::Error::custom("No matching enum variant found"))
}
}

/// This enum shows how a kinded enum could be implemented.
#[derive(Debug, PartialEq)]
pub enum Kinded {
Bytes(ByteBuf),
Link(Cid),
}

impl TryFrom<Ipld> for Kinded {
type Error = ();

fn try_from(ipld: Ipld) -> Result<Self, Self::Error> {
match ipld {
Ipld::Bytes(bytes) => Ok(Self::Bytes(ByteBuf::from(bytes))),
Ipld::Link(cid) => Ok(Self::Link(cid)),
_ => Err(()),
}
}
}

impl<'de> de::Deserialize<'de> for Kinded {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: de::Deserializer<'de>,
{
Ipld::deserialize(deserializer).and_then(|ipld| {
ipld.try_into()
.map_err(|_| de::Error::custom("No matching enum variant found"))
})
}
}

pub fn main() {
let cid: Cid = from_slice(&CBOR_CID_FIXTURE).unwrap();

// {"type": "MyCid", "cid": 42(h'00015512202C26B46B68FFC68FF99B453C1D30413413422D706483BFA0F98A5E886266E7AE')}
let cbor_internally_tagged_enum = [
&[
0xa2, 0x64, 0x74, 0x79, 0x70, 0x65, 0x65, 0x4d, 0x79, 0x43, 0x69, 0x64, 0x63, 0x63,
0x69, 0x64,
],
&CBOR_CID_FIXTURE[..],
]
.concat();
assert_eq!(
from_slice::<CidInInternallyTaggedEnum>(&cbor_internally_tagged_enum).unwrap(),
CidInInternallyTaggedEnum::MyCid { cid }
);

assert_eq!(
from_slice::<CidInUntaggedEnum>(&CBOR_CID_FIXTURE).unwrap(),
CidInUntaggedEnum::MyCid(cid)
);

assert_eq!(
from_slice::<Kinded>(&CBOR_CID_FIXTURE).unwrap(),
Kinded::Link(cid)
);

// The CID without the tag 42 prefix, so that it decodes as just bytes.
let cbor_bytes = &CBOR_CID_FIXTURE[2..];
let decoded_bytes: Kinded = from_slice(cbor_bytes).unwrap();
// The CBOR decoded bytes don't contain the prefix with the bytes type identifier and the
// length.
let bytes = cbor_bytes[2..].to_vec();
assert_eq!(decoded_bytes, Kinded::Bytes(ByteBuf::from(bytes)));
}

// Make it possible to run this example as test.
#[test]
fn test_main() {
main()
}
5 changes: 5 additions & 0 deletions src/de.rs
Original file line number Diff line number Diff line change
Expand Up @@ -618,6 +618,11 @@ struct CidDeserializer<'a, R>(&'a mut Deserializer<R>);
impl<'de, 'a, R: dec::Read<'de>> de::Deserializer<'de> for &'a mut CidDeserializer<'a, R> {
type Error = DecodeError<R::Error>;

#[cfg(not(feature = "no-cid-as-bytes"))]
fn deserialize_any<V: de::Visitor<'de>>(self, visitor: V) -> Result<V::Value, Self::Error> {
self.deserialize_bytes(visitor)
}
#[cfg(feature = "no-cid-as-bytes")]
fn deserialize_any<V: de::Visitor<'de>>(self, _visitor: V) -> Result<V::Value, Self::Error> {
Err(de::Error::custom(
"Only bytes can be deserialized into a CID",
Expand Down
91 changes: 91 additions & 0 deletions tests/cid.rs
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,32 @@ fn test_cid_not_as_bytes() {
.expect_err("shouldn't have parsed a tagged CID as a byte array");
from_slice::<serde_bytes::ByteBuf>(&cbor_cid[2..])
.expect("should have parsed an untagged CID as a byte array");

#[derive(Debug, Deserialize, PartialEq)]
struct NewType(ByteBuf);

#[derive(Debug, Deserialize, PartialEq)]
#[serde(untagged)]
enum BytesInEnum {
MyCid(NewType),
}

// With the `no-cid-as-bytes` feature enabled, we make sure that it will error, when we try to
// decode a CID as bytes.
#[cfg(feature = "no-cid-as-bytes")]
from_slice::<BytesInEnum>(&cbor_cid)
.expect_err("shouldn't have parsed a tagged CID as byte array");

// With that feature disabled, then it will decode the CID (without the TAG and the zero
// prefix) as bytes.
#[cfg(not(feature = "no-cid-as-bytes"))]
{
let cid_without_tag = &cbor_cid[5..];
assert_eq!(
from_slice::<BytesInEnum>(&cbor_cid).unwrap(),
BytesInEnum::MyCid(NewType(ByteBuf::from(cid_without_tag)))
vmx marked this conversation as resolved.
Show resolved Hide resolved
);
}
}

/// Test whether a binary CID could be serialized if it isn't prefixed by tag 42. It should fail.
Expand Down Expand Up @@ -219,6 +245,71 @@ fn test_cid_in_kinded_enum_with_newtype() {
assert!(decoded_random_bytes.is_err());
}

#[test]
fn test_cid_in_tagged_enum() {
#[derive(Debug, Deserialize, PartialEq)]
pub enum Externally {
Cid(Cid),
}

#[derive(Debug, Deserialize, PartialEq)]
#[serde(tag = "type")]
pub enum Internally {
Cid { cid: Cid },
}

#[derive(Debug, Deserialize, PartialEq)]
#[serde(untagged)]
pub enum Untagged {
Cid(Cid),
}

let cbor_cid = [
0xd8, 0x2a, 0x58, 0x25, 0x00, 0x01, 0x55, 0x12, 0x20, 0x2c, 0x26, 0xb4, 0x6b, 0x68, 0xff,
0xc6, 0x8f, 0xf9, 0x9b, 0x45, 0x3c, 0x1d, 0x30, 0x41, 0x34, 0x13, 0x42, 0x2d, 0x70, 0x64,
0x83, 0xbf, 0xa0, 0xf9, 0x8a, 0x5e, 0x88, 0x62, 0x66, 0xe7, 0xae,
];

// {"Cid": cid}
let cbor_map1 = [vec![0xa1, 0x63, 0x43, 0x69, 0x64], Vec::from(cbor_cid)].concat();

// {"cid": cid, "type": "Cid"}
let cbor_map2 = [
vec![
0xa2, 0x64, 0x74, 0x79, 0x70, 0x65, 0x63, 0x43, 0x69, 0x64, 0x63, 0x63, 0x69, 0x64,
],
Vec::from(cbor_cid),
]
.concat();

let cid = Cid::try_from(&cbor_cid[5..]).unwrap();

let decoded: Externally = from_slice(&cbor_map1).unwrap();
assert_eq!(decoded, Externally::Cid(cid));

// With the `no-cid-as-bytes` feature enabled, it's not possible to use internally tagged or
// untaggd enums. This behaviour is *not* intentionally, but incidentally due to how Serde
// internally works.. This test is only added to see what one could expect, and to get
// notified in case it ever gets supported.
#[cfg(feature = "no-cid-as-bytes")]
{
from_slice::<Internally>(&cbor_map2)
.expect_err("shouldn't be able to decode the intanlly tagged enum");
from_slice::<Untagged>(&cbor_cid)
.expect_err("shouldn't be able to decode the untagged enum");
}

// With that feature disabled, it's the expected desired behaviour.
#[cfg(not(feature = "no-cid-as-bytes"))]
{
let decoded: Internally = from_slice(&cbor_map2).unwrap();
assert_eq!(decoded, Internally::Cid { cid });

let decoded: Untagged = from_slice(&cbor_cid).unwrap();
assert_eq!(decoded, Untagged::Cid(cid));
}
}

#[test]
fn test_cid_empty_errors() {
// Tag 42 with zero bytes
Expand Down
Loading