diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 3b85863..0436ac5 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -37,5 +37,9 @@ jobs: shell: bash - name: Test - run: cargo test --workspace + run: cargo test --all-targets --workspace + shell: bash + + - name: Test no-cid-as-bytes feature + run: cargo test --all-targets --workspace --features no-cid-as-bytes shell: bash diff --git a/Cargo.toml b/Cargo.toml index 3682cb8..500efd4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -28,3 +28,5 @@ serde_bytes = { version = "0.11.9", default-features = false, features = ["alloc [features] default = ["std"] std = ["cbor4ii/use_std", "cid/std", "serde/std", "serde_bytes/std"] +# Prevent deserializing CIDs as bytes as much as possible. +no-cid-as-bytes = [] diff --git a/README.md b/README.md index 39754e7..4c322a4 100644 --- a/README.md +++ b/README.md @@ -66,6 +66,16 @@ fn main() -> Result<(), Box> { ``` +Features +-------- + +### `no-cid-as-bytes` + +Sometimes it is desired that a CID is not accidentally deserialized into bytes. This can happen because the intermediate serde data model does not retain enough information to be able to differentiate between a bytes container and a CID container when there is a conflicting choice to be made, as in the case of some enum cases. The `no-cid-as-bytes` feature can be enabled in order to error at runtime in such cases. + +The problem with that feature is, that it breaks Serde's derive attributes for [internally tagged enums](https://serde.rs/enum-representations.html#internally-tagged) (`#[serde(tag = "sometag")]`) and [untagged enums](https://serde.rs/enum-representations.html#untagged) (`#serde(untagged)`). If this feature is enabled and you still need similar functionality, you could implement a deserializer manually. Examples of how to do that are in the [enum example](examples/enums.rs). + + License ------- diff --git a/examples/enums.rs b/examples/enums.rs new file mode 100644 index 0000000..dd19b9b --- /dev/null +++ b/examples/enums.rs @@ -0,0 +1,138 @@ +/// Serde untagged (`#[serde(untagged)]`) and internaly tagged enums (`#[serde(tag = "tag")]`) are +/// not supported by CIDs. Here examples are provided on how to implement similar behaviour. This +/// file also contains an example for a kinded enum. +use std::convert::{TryFrom, TryInto}; + +use cid::Cid; +use libipld_core::ipld::Ipld; +use serde::{de, Deserialize}; +use serde_bytes::ByteBuf; +use serde_ipld_dagcbor::from_slice; + +/// The CID `bafkreibme22gw2h7y2h7tg2fhqotaqjucnbc24deqo72b6mkl2egezxhvy` encoded as CBOR +/// 42(h'00015512202C26B46B68FFC68FF99B453C1D30413413422D706483BFA0F98A5E886266E7AE') +const CBOR_CID_FIXTURE: [u8; 41] = [ + 0xd8, 0x2a, 0x58, 0x25, 0x00, 0x01, 0x55, 0x12, 0x20, 0x2c, 0x26, 0xb4, 0x6b, 0x68, 0xff, 0xc6, + 0x8f, 0xf9, 0x9b, 0x45, 0x3c, 0x1d, 0x30, 0x41, 0x34, 0x13, 0x42, 0x2d, 0x70, 0x64, 0x83, 0xbf, + 0xa0, 0xf9, 0x8a, 0x5e, 0x88, 0x62, 0x66, 0xe7, 0xae, +]; + +/// This enum shows how an internally tagged enum could be implemented. +#[derive(Debug, PartialEq)] +enum CidInInternallyTaggedEnum { + MyCid { cid: Cid }, +} + +// This manual deserializer implementation works as if you would derive `Deserialize` and add +// `#[serde(tag = "type")]` to the `CidInternallyTaggedEnum` enum. +impl<'de> de::Deserialize<'de> for CidInInternallyTaggedEnum { + fn deserialize(deserializer: D) -> Result + where + D: de::Deserializer<'de>, + { + #[derive(Deserialize)] + struct Tagged { + r#type: String, + cid: Cid, + } + + let Tagged { r#type, cid } = Deserialize::deserialize(deserializer)?; + if r#type == "MyCid" { + Ok(CidInInternallyTaggedEnum::MyCid { cid }) + } else { + Err(de::Error::custom("No matching enum variant found")) + } + } +} + +/// This enum shows how an untagged enum could be implemented. +#[derive(Debug, PartialEq)] +enum CidInUntaggedEnum { + MyCid(Cid), +} + +// This manual deserializer implementation works as if you would derive `Deserialize` and add +// `#[serde(untagged)]`. +impl<'de> de::Deserialize<'de> for CidInUntaggedEnum { + fn deserialize(deserializer: D) -> Result + where + D: de::Deserializer<'de>, + { + Cid::deserialize(deserializer) + .map(CidInUntaggedEnum::MyCid) + .map_err(|_| de::Error::custom("No matching enum variant found")) + } +} + +/// This enum shows how a kinded enum could be implemented. +#[derive(Debug, PartialEq)] +pub enum Kinded { + Bytes(ByteBuf), + Link(Cid), +} + +impl TryFrom for Kinded { + type Error = (); + + fn try_from(ipld: Ipld) -> Result { + match ipld { + Ipld::Bytes(bytes) => Ok(Self::Bytes(ByteBuf::from(bytes))), + Ipld::Link(cid) => Ok(Self::Link(cid)), + _ => Err(()), + } + } +} + +impl<'de> de::Deserialize<'de> for Kinded { + fn deserialize(deserializer: D) -> Result + where + D: de::Deserializer<'de>, + { + Ipld::deserialize(deserializer).and_then(|ipld| { + ipld.try_into() + .map_err(|_| de::Error::custom("No matching enum variant found")) + }) + } +} + +pub fn main() { + let cid: Cid = from_slice(&CBOR_CID_FIXTURE).unwrap(); + + // {"type": "MyCid", "cid": 42(h'00015512202C26B46B68FFC68FF99B453C1D30413413422D706483BFA0F98A5E886266E7AE')} + let cbor_internally_tagged_enum = [ + &[ + 0xa2, 0x64, 0x74, 0x79, 0x70, 0x65, 0x65, 0x4d, 0x79, 0x43, 0x69, 0x64, 0x63, 0x63, + 0x69, 0x64, + ], + &CBOR_CID_FIXTURE[..], + ] + .concat(); + assert_eq!( + from_slice::(&cbor_internally_tagged_enum).unwrap(), + CidInInternallyTaggedEnum::MyCid { cid } + ); + + assert_eq!( + from_slice::(&CBOR_CID_FIXTURE).unwrap(), + CidInUntaggedEnum::MyCid(cid) + ); + + assert_eq!( + from_slice::(&CBOR_CID_FIXTURE).unwrap(), + Kinded::Link(cid) + ); + + // The CID without the tag 42 prefix, so that it decodes as just bytes. + let cbor_bytes = &CBOR_CID_FIXTURE[2..]; + let decoded_bytes: Kinded = from_slice(cbor_bytes).unwrap(); + // The CBOR decoded bytes don't contain the prefix with the bytes type identifier and the + // length. + let bytes = cbor_bytes[2..].to_vec(); + assert_eq!(decoded_bytes, Kinded::Bytes(ByteBuf::from(bytes))); +} + +// Make it possible to run this example as test. +#[test] +fn test_main() { + main() +} diff --git a/src/de.rs b/src/de.rs index 330a8c2..1191234 100644 --- a/src/de.rs +++ b/src/de.rs @@ -618,6 +618,11 @@ struct CidDeserializer<'a, R>(&'a mut Deserializer); impl<'de, 'a, R: dec::Read<'de>> de::Deserializer<'de> for &'a mut CidDeserializer<'a, R> { type Error = DecodeError; + #[cfg(not(feature = "no-cid-as-bytes"))] + fn deserialize_any>(self, visitor: V) -> Result { + self.deserialize_bytes(visitor) + } + #[cfg(feature = "no-cid-as-bytes")] fn deserialize_any>(self, _visitor: V) -> Result { Err(de::Error::custom( "Only bytes can be deserialized into a CID", diff --git a/tests/cid.rs b/tests/cid.rs index 7369b7a..84fabec 100644 --- a/tests/cid.rs +++ b/tests/cid.rs @@ -82,6 +82,32 @@ fn test_cid_not_as_bytes() { .expect_err("shouldn't have parsed a tagged CID as a byte array"); from_slice::(&cbor_cid[2..]) .expect("should have parsed an untagged CID as a byte array"); + + #[derive(Debug, Deserialize, PartialEq)] + struct NewType(ByteBuf); + + #[derive(Debug, Deserialize, PartialEq)] + #[serde(untagged)] + enum BytesInEnum { + MyCid(NewType), + } + + // With the `no-cid-as-bytes` feature enabled, we make sure that it will error, when we try to + // decode a CID as bytes. + #[cfg(feature = "no-cid-as-bytes")] + from_slice::(&cbor_cid) + .expect_err("shouldn't have parsed a tagged CID as byte array"); + + // With that feature disabled, then it will decode the CID (without the TAG and the zero + // prefix) as bytes. + #[cfg(not(feature = "no-cid-as-bytes"))] + { + let cid_without_tag = &cbor_cid[5..]; + assert_eq!( + from_slice::(&cbor_cid).unwrap(), + BytesInEnum::MyCid(NewType(ByteBuf::from(cid_without_tag))) + ); + } } /// Test whether a binary CID could be serialized if it isn't prefixed by tag 42. It should fail. @@ -219,6 +245,71 @@ fn test_cid_in_kinded_enum_with_newtype() { assert!(decoded_random_bytes.is_err()); } +#[test] +fn test_cid_in_tagged_enum() { + #[derive(Debug, Deserialize, PartialEq)] + pub enum Externally { + Cid(Cid), + } + + #[derive(Debug, Deserialize, PartialEq)] + #[serde(tag = "type")] + pub enum Internally { + Cid { cid: Cid }, + } + + #[derive(Debug, Deserialize, PartialEq)] + #[serde(untagged)] + pub enum Untagged { + Cid(Cid), + } + + let cbor_cid = [ + 0xd8, 0x2a, 0x58, 0x25, 0x00, 0x01, 0x55, 0x12, 0x20, 0x2c, 0x26, 0xb4, 0x6b, 0x68, 0xff, + 0xc6, 0x8f, 0xf9, 0x9b, 0x45, 0x3c, 0x1d, 0x30, 0x41, 0x34, 0x13, 0x42, 0x2d, 0x70, 0x64, + 0x83, 0xbf, 0xa0, 0xf9, 0x8a, 0x5e, 0x88, 0x62, 0x66, 0xe7, 0xae, + ]; + + // {"Cid": cid} + let cbor_map1 = [vec![0xa1, 0x63, 0x43, 0x69, 0x64], Vec::from(cbor_cid)].concat(); + + // {"cid": cid, "type": "Cid"} + let cbor_map2 = [ + vec![ + 0xa2, 0x64, 0x74, 0x79, 0x70, 0x65, 0x63, 0x43, 0x69, 0x64, 0x63, 0x63, 0x69, 0x64, + ], + Vec::from(cbor_cid), + ] + .concat(); + + let cid = Cid::try_from(&cbor_cid[5..]).unwrap(); + + let decoded: Externally = from_slice(&cbor_map1).unwrap(); + assert_eq!(decoded, Externally::Cid(cid)); + + // With the `no-cid-as-bytes` feature enabled, it's not possible to use internally tagged or + // untaggd enums. This behaviour is *not* intentionally, but incidentally due to how Serde + // internally works.. This test is only added to see what one could expect, and to get + // notified in case it ever gets supported. + #[cfg(feature = "no-cid-as-bytes")] + { + from_slice::(&cbor_map2) + .expect_err("shouldn't be able to decode the intanlly tagged enum"); + from_slice::(&cbor_cid) + .expect_err("shouldn't be able to decode the untagged enum"); + } + + // With that feature disabled, it's the expected desired behaviour. + #[cfg(not(feature = "no-cid-as-bytes"))] + { + let decoded: Internally = from_slice(&cbor_map2).unwrap(); + assert_eq!(decoded, Internally::Cid { cid }); + + let decoded: Untagged = from_slice(&cbor_cid).unwrap(); + assert_eq!(decoded, Untagged::Cid(cid)); + } +} + #[test] fn test_cid_empty_errors() { // Tag 42 with zero bytes