Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: introduce no-cid-as-bytes feature #23

Merged
merged 4 commits into from
Feb 23, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,5 +37,9 @@ jobs:
shell: bash

- name: Test
run: cargo test --workspace
run: cargo test --all-targets --workspace
shell: bash

- name: Test no-cid-as-bytes feature
run: cargo test --all-targets --workspace --features no-cid-as-bytes
shell: bash
2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,5 @@ serde_bytes = { version = "0.11.9", default-features = false, features = ["alloc
[features]
default = ["std"]
std = ["cbor4ii/use_std", "cid/std", "serde/std", "serde_bytes/std"]
# Prevent deserializing CIDs as bytes as much as possible.
no-cid-as-bytes = []
10 changes: 10 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,16 @@ fn main() -> Result<(), Box<dyn Error>> {
```


Features
--------

### `no-cid-as-bytes`

Sometimes it is desired that a CID is not accidentally deserialized into bytes. The `no-cid-as-bytes` feature can be enabled in order to error at runtime in such cases.
vmx marked this conversation as resolved.
Show resolved Hide resolved

The problem with that feature is, that it breaks Serde's derive attributes for [internally tagged enums](https://serde.rs/enum-representations.html#internally-tagged) (`#[serde(tag = "sometag")]`) and [untagged enums](https://serde.rs/enum-representations.html#untagged) (`#serde(untagged)`). If this feature is enabled and you still need similar functionality, you could implement a deserializer manually. Examples of how to do that are in the [enum example](examples/enums.rs).


License
-------

Expand Down
138 changes: 138 additions & 0 deletions examples/enums.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
/// Serde untagged (`#[serde(untagged)]`) and internaly tagged enums (`#[serde(tag = "tag")]`) are
/// not supported by CIDs. Here examples are provided on how to implement similar behaviour. This
/// file also contains an example for a kinded enum.
use std::convert::{TryFrom, TryInto};

use cid::Cid;
use libipld_core::ipld::Ipld;
use serde::{de, Deserialize};
use serde_bytes::ByteBuf;
use serde_ipld_dagcbor::from_slice;

/// The CID `bafkreibme22gw2h7y2h7tg2fhqotaqjucnbc24deqo72b6mkl2egezxhvy` encoded as CBOR
/// 42(h'00015512202C26B46B68FFC68FF99B453C1D30413413422D706483BFA0F98A5E886266E7AE')
const CBOR_CID_FIXTURE: [u8; 41] = [
0xd8, 0x2a, 0x58, 0x25, 0x00, 0x01, 0x55, 0x12, 0x20, 0x2c, 0x26, 0xb4, 0x6b, 0x68, 0xff, 0xc6,
0x8f, 0xf9, 0x9b, 0x45, 0x3c, 0x1d, 0x30, 0x41, 0x34, 0x13, 0x42, 0x2d, 0x70, 0x64, 0x83, 0xbf,
0xa0, 0xf9, 0x8a, 0x5e, 0x88, 0x62, 0x66, 0xe7, 0xae,
];

/// This enum shows how an internally tagged enum could be implemented.
#[derive(Debug, PartialEq)]
enum CidInInternallyTaggedEnum {
MyCid { cid: Cid },
}

// This manual deserializer implementation works as if you would derive `Deserialize` and add
// `#[serde(tag = "type")]` to the `CidInternallyTaggedEnum` enum.
impl<'de> de::Deserialize<'de> for CidInInternallyTaggedEnum {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: de::Deserializer<'de>,
{
#[derive(Deserialize)]
struct Tagged {
r#type: String,
cid: Cid,
}

let Tagged { r#type, cid } = Deserialize::deserialize(deserializer)?;
if r#type == "MyCid" {
Ok(CidInInternallyTaggedEnum::MyCid { cid })
} else {
Err(de::Error::custom("No matching enum variant found"))
}
}
}

/// This enum shows how an untagged enum could be implemented.
#[derive(Debug, PartialEq)]
enum CidInUntaggedEnum {
MyCid(Cid),
}

// This manual deserializer implementation works as if you would derive `Deserialize` and add
// `#[serde(untagged)]`.
impl<'de> de::Deserialize<'de> for CidInUntaggedEnum {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: de::Deserializer<'de>,
{
Cid::deserialize(deserializer)
.map(CidInUntaggedEnum::MyCid)
.map_err(|_| de::Error::custom("No matching enum variant found"))
}
}

/// This enum shows how a kinded enum could be implemented.
#[derive(Debug, PartialEq)]
pub enum Kinded {
Bytes(ByteBuf),
Link(Cid),
}

impl TryFrom<Ipld> for Kinded {
type Error = ();

fn try_from(ipld: Ipld) -> Result<Self, Self::Error> {
match ipld {
Ipld::Bytes(bytes) => Ok(Self::Bytes(ByteBuf::from(bytes))),
Ipld::Link(cid) => Ok(Self::Link(cid)),
_ => Err(()),
}
}
}

impl<'de> de::Deserialize<'de> for Kinded {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: de::Deserializer<'de>,
{
Ipld::deserialize(deserializer).and_then(|ipld| {
ipld.try_into()
.map_err(|_| de::Error::custom("No matching enum variant found"))
})
}
}

pub fn main() {
let cid: Cid = from_slice(&CBOR_CID_FIXTURE).unwrap();

// {"type": "MyCid", "cid": 42(h'00015512202C26B46B68FFC68FF99B453C1D30413413422D706483BFA0F98A5E886266E7AE')}
let cbor_internally_tagged_enum = [
&[
0xa2, 0x64, 0x74, 0x79, 0x70, 0x65, 0x65, 0x4d, 0x79, 0x43, 0x69, 0x64, 0x63, 0x63,
0x69, 0x64,
],
&CBOR_CID_FIXTURE[..],
]
.concat();
assert_eq!(
from_slice::<CidInInternallyTaggedEnum>(&cbor_internally_tagged_enum).unwrap(),
CidInInternallyTaggedEnum::MyCid { cid }
);

assert_eq!(
from_slice::<CidInUntaggedEnum>(&CBOR_CID_FIXTURE).unwrap(),
CidInUntaggedEnum::MyCid(cid)
);

assert_eq!(
from_slice::<Kinded>(&CBOR_CID_FIXTURE).unwrap(),
Kinded::Link(cid)
);

// The CID without the tag 42 prefix, so that it decodes as just bytes.
let cbor_bytes = &CBOR_CID_FIXTURE[2..];
let decoded_bytes: Kinded = from_slice(cbor_bytes).unwrap();
// The CBOR decoded bytes don't contain the prefix with the bytes type identifier and the
// length.
let bytes = cbor_bytes[2..].to_vec();
assert_eq!(decoded_bytes, Kinded::Bytes(ByteBuf::from(bytes)));
}

// Make it possible to run this example as test.
#[test]
fn test_main() {
main()
}
5 changes: 5 additions & 0 deletions src/de.rs
Original file line number Diff line number Diff line change
Expand Up @@ -618,6 +618,11 @@ struct CidDeserializer<'a, R>(&'a mut Deserializer<R>);
impl<'de, 'a, R: dec::Read<'de>> de::Deserializer<'de> for &'a mut CidDeserializer<'a, R> {
type Error = DecodeError<R::Error>;

#[cfg(not(feature = "no-cid-as-bytes"))]
fn deserialize_any<V: de::Visitor<'de>>(self, visitor: V) -> Result<V::Value, Self::Error> {
self.deserialize_bytes(visitor)
}
#[cfg(feature = "no-cid-as-bytes")]
fn deserialize_any<V: de::Visitor<'de>>(self, _visitor: V) -> Result<V::Value, Self::Error> {
Err(de::Error::custom(
"Only bytes can be deserialized into a CID",
Expand Down
65 changes: 65 additions & 0 deletions tests/cid.rs
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,21 @@ fn test_cid_not_as_bytes() {
.expect_err("shouldn't have parsed a tagged CID as a byte array");
from_slice::<serde_bytes::ByteBuf>(&cbor_cid[2..])
.expect("should have parsed an untagged CID as a byte array");

#[cfg(feature = "no-cid-as-bytes")]
{
#[derive(Debug, Deserialize)]
struct NewType(ByteBuf);

#[derive(Debug, Deserialize)]
#[serde(untagged)]
enum BytesInEnum {
MyCid(NewType),
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What I'm interested in is what happens when you do this without the feature. Can you add a test for that with these types and run the assertions as expected but also document that this behaviour may be undesirable? This is the key point of the issue right here, isn't it?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@rvagg sorry for the delay, I've pushed a new commit which should address your concerns. If not, please let me know.


from_slice::<BytesInEnum>(&cbor_cid)
.expect_err("shouldn't have parsed a tagged CID as byte array");
}
}

/// Test whether a binary CID could be serialized if it isn't prefixed by tag 42. It should fail.
Expand Down Expand Up @@ -219,6 +234,56 @@ fn test_cid_in_kinded_enum_with_newtype() {
assert!(decoded_random_bytes.is_err());
}

#[cfg(not(feature = "no-cid-as-bytes"))]
#[test]
fn test_cid_in_tagged_enum() {
#[derive(Debug, Deserialize, PartialEq)]
pub enum Externally {
Cid(Cid),
}

#[derive(Debug, Deserialize, PartialEq)]
#[serde(tag = "type")]
pub enum Internally {
Cid { cid: Cid },
}

#[derive(Debug, Deserialize, PartialEq)]
#[serde(untagged)]
pub enum Untagged {
Cid(Cid),
}

let cbor_cid = [
0xd8, 0x2a, 0x58, 0x25, 0x00, 0x01, 0x55, 0x12, 0x20, 0x2c, 0x26, 0xb4, 0x6b, 0x68, 0xff,
0xc6, 0x8f, 0xf9, 0x9b, 0x45, 0x3c, 0x1d, 0x30, 0x41, 0x34, 0x13, 0x42, 0x2d, 0x70, 0x64,
0x83, 0xbf, 0xa0, 0xf9, 0x8a, 0x5e, 0x88, 0x62, 0x66, 0xe7, 0xae,
];

// {"Cid": cid}
let cbor_map1 = [vec![0xa1, 0x63, 0x43, 0x69, 0x64], Vec::from(cbor_cid)].concat();

// {"cid": cid, "type": "Cid"}
let cbor_map2 = [
vec![
0xa2, 0x64, 0x74, 0x79, 0x70, 0x65, 0x63, 0x43, 0x69, 0x64, 0x63, 0x63, 0x69, 0x64,
],
Vec::from(cbor_cid),
]
.concat();

let cid = Cid::try_from(&cbor_cid[5..]).unwrap();

let decoded: Externally = from_slice(&cbor_map1).unwrap();
assert_eq!(decoded, Externally::Cid(cid));

let decoded: Internally = from_slice(&cbor_map2).unwrap();
assert_eq!(decoded, Internally::Cid { cid });

let decoded: Untagged = from_slice(&cbor_cid).unwrap();
assert_eq!(decoded, Untagged::Cid(cid));
}

#[test]
fn test_cid_empty_errors() {
// Tag 42 with zero bytes
Expand Down
Loading