diff --git a/etc/test-data/osv/RSEC-2023-6.yaml b/etc/test-data/osv/RSEC-2023-6.yaml new file mode 100644 index 00000000..31b58486 --- /dev/null +++ b/etc/test-data/osv/RSEC-2023-6.yaml @@ -0,0 +1,41 @@ +id: RSEC-2023-6 +details: The commonmark package, specifically in its dependency on GitHub Flavored Markdown before version 0.29.0.gfm.1, + has a vulnerability related to time complexity. Parsing certain crafted markdown tables can take O(n * n) time, + leading to potential Denial of Service attacks. This issue does not affect the upstream cmark project and has been + fixed in version 0.29.0.gfm.1. +summary: Denial of Service (DoS) vulnerability +affected: + - package: + name: commonmark + ecosystem: CRAN + ranges: + - type: ECOSYSTEM + events: + - introduced: "0.2" + - fixed: "1.8" + versions: + - "0.2" + - "0.4" + - "0.5" + - "0.6" + - "0.7" + - "0.8" + - "0.9" + - "1.0" + - "1.1" + - "1.2" + - "1.4" + - "1.5" + - "1.6" + - "1.7" +references: + - type: WEB + url: https://security-tracker.debian.org/tracker/CVE-2020-5238 + - type: WEB + url: https://github.com/r-lib/commonmark/issues/13 + - type: WEB + url: https://github.com/r-lib/commonmark/pull/18 +aliases: + - CVE-2020-5238 +modified: "2023-10-20T07:27:00.600Z" +published: "2023-10-06T05:00:00.600Z" diff --git a/modules/ingestor/src/service/format.rs b/modules/ingestor/src/service/format.rs index 24a22d68..dd01f028 100644 --- a/modules/ingestor/src/service/format.rs +++ b/modules/ingestor/src/service/format.rs @@ -30,7 +30,7 @@ use tracing::instrument; use trustify_common::hashing::Digests; use trustify_entity::labels::Labels; -#[derive(Debug)] +#[derive(Clone, Copy, Debug)] pub enum Format { OSV, CSAF, @@ -74,7 +74,13 @@ impl<'g> Format { Format::OSV => { // issuer is :shrug: sometimes we can tell, sometimes not :shrug: let loader = OsvLoader::new(graph); - let osv: Vulnerability = serde_json::from_slice(&buffer)?; + let osv: Vulnerability = serde_json::from_slice(&buffer) + .map_err(Error::from) + .or_else(|_| { + serde_yml::from_slice::(&buffer) + .map(|osv| osv.0) + .map_err(Error::from) + })?; loader.load(labels, osv, digests, issuer).await } Format::CVE => { @@ -173,12 +179,21 @@ impl<'g> Format { } pub fn is_osv(bytes: &[u8]) -> Result { + Ok(Self::is_osv_json(bytes)? || Self::is_osv_yaml(bytes)?) + } + + pub fn is_osv_json(bytes: &[u8]) -> Result { match masked(depth(1).and(key("id")), bytes) { Ok(Some(_)) => Ok(true), Err(_) | Ok(None) => Ok(false), } } + pub fn is_osv_yaml(bytes: &[u8]) -> Result { + // TODO: find a way to detect format with streaming + Ok(serde_yml::from_slice::(bytes).is_ok()) + } + pub fn is_spdx(bytes: &[u8]) -> Result { match masked(depth(1).and(key("spdxVersion")), bytes) { Ok(Some(x)) if matches!(x.as_str(), "SPDX-2.2" | "SPDX-2.3") => Ok(true), @@ -264,6 +279,9 @@ fn masked(mask: N, bytes: &[u8]) -> Result, Error> { .transpose() } +#[derive(serde::Deserialize)] +struct Nested(#[serde(with = "serde_yml::with::singleton_map_recursive")] Vulnerability); + #[cfg(test)] mod test { use super::*; @@ -281,6 +299,9 @@ mod test { let osv = document_bytes("osv/RUSTSEC-2021-0079.json").await?; assert!(matches!(Format::from_bytes(&osv), Ok(Format::OSV))); + let osv = document_bytes("osv/RSEC-2023-6.yaml").await?; + assert!(matches!(Format::from_bytes(&osv), Ok(Format::OSV))); + let cve = document_bytes("mitre/CVE-2024-27088.json").await?; assert!(matches!(Format::from_bytes(&cve), Ok(Format::CVE))); diff --git a/modules/ingestor/tests/reingest/osv.rs b/modules/ingestor/tests/reingest/osv.rs index f433da37..c7f5c024 100644 --- a/modules/ingestor/tests/reingest/osv.rs +++ b/modules/ingestor/tests/reingest/osv.rs @@ -1,62 +1,135 @@ #![allow(clippy::expect_used)] use anyhow::bail; +use std::future::Future; use test_context::test_context; use test_log::test; use trustify_common::id::Id; use trustify_module_ingestor::model::IngestResult; +use trustify_module_ingestor::service::Format; use trustify_test_context::TrustifyContext; #[test_context(TrustifyContext)] #[test(tokio::test)] -async fn reingest(ctx: &TrustifyContext) -> Result<(), anyhow::Error> { - async fn assert(ctx: &TrustifyContext, result: IngestResult) -> anyhow::Result<()> { - let Id::Uuid(id) = result.id else { - bail!("must be an id") - }; - let adv = ctx - .graph - .get_advisory_by_id(id, ()) - .await? - .expect("must be found"); - - assert_eq!(adv.vulnerabilities(()).await?.len(), 1); - - let all = adv.vulnerabilities(&()).await?; - assert_eq!(all.len(), 1); - assert_eq!( - all[0].advisory_vulnerability.vulnerability_id, - "CVE-2021-32714" - ); - - let all = ctx.graph.get_vulnerabilities(()).await?; - assert_eq!(all.len(), 1); - - let vuln = ctx - .graph - .get_vulnerability("CVE-2021-32714", ()) - .await? - .expect("Must be found"); - - assert_eq!(vuln.vulnerability.id, "CVE-2021-32714"); - - let descriptions = vuln.descriptions("en", ()).await?; - assert_eq!(descriptions.len(), 0); - - Ok(()) - } +async fn reingest_json(ctx: &TrustifyContext) -> Result<(), anyhow::Error> { + reingest( + ctx, + "osv/RUSTSEC-2021-0079.json", + Format::OSV, + |ctx, result| async move { + assert_common(ctx, &result, "CVE-2021-32714").await?; + Ok(()) + }, + ) + .await +} + +#[test_context(TrustifyContext)] +#[test(tokio::test)] +async fn reingest_json_unknown(ctx: &TrustifyContext) -> Result<(), anyhow::Error> { + reingest( + ctx, + "osv/RUSTSEC-2021-0079.json", + Format::Unknown, + |ctx, result| async move { + assert_common(ctx, &result, "CVE-2021-32714").await?; + Ok(()) + }, + ) + .await +} + +#[test_context(TrustifyContext)] +#[test(tokio::test)] +async fn reingest_yaml(ctx: &TrustifyContext) -> Result<(), anyhow::Error> { + reingest( + ctx, + "osv/RSEC-2023-6.yaml", + Format::OSV, + |ctx, result| async move { + assert_common(ctx, &result, "CVE-2020-5238").await?; + Ok(()) + }, + ) + .await +} +#[test_context(TrustifyContext)] +#[test(tokio::test)] +async fn reingest_yaml_unknown(ctx: &TrustifyContext) -> Result<(), anyhow::Error> { + reingest( + ctx, + "osv/RSEC-2023-6.yaml", + Format::Unknown, + |ctx, result| async move { + assert_common(ctx, &result, "CVE-2020-5238").await?; + Ok(()) + }, + ) + .await +} + +async fn reingest<'a, F, Fut>( + ctx: &'a TrustifyContext, + file: &'static str, + format: Format, + assert: F, +) -> Result<(), anyhow::Error> +where + F: Fn(&'a TrustifyContext, IngestResult) -> Fut + 'a, + Fut: Future> + 'a, +{ // ingest once - let result = ctx.ingest_document("osv/RUSTSEC-2021-0079.json").await?; + let result = ctx.ingest_document_as(file, format).await?; assert(ctx, result).await?; // ingest second time - let result = ctx.ingest_document("osv/RUSTSEC-2021-0079.json").await?; + let result = ctx.ingest_document_as(file, format).await?; assert(ctx, result).await?; // done Ok(()) } + +async fn assert_common( + ctx: &TrustifyContext, + result: &IngestResult, + expected_vuln_id: &str, +) -> anyhow::Result<()> { + let Id::Uuid(id) = result.id else { + bail!("must be an id") + }; + let adv = ctx + .graph + .get_advisory_by_id(id, ()) + .await? + .expect("must be found"); + + assert_eq!(adv.vulnerabilities(()).await?.len(), 1); + + let all = adv.vulnerabilities(&()).await?; + assert_eq!(all.len(), 1); + assert_eq!( + all[0].advisory_vulnerability.vulnerability_id, + expected_vuln_id + ); + + let all = ctx.graph.get_vulnerabilities(()).await?; + assert_eq!(all.len(), 1); + + let vuln = ctx + .graph + .get_vulnerability(expected_vuln_id, ()) + .await? + .expect("Must be found"); + + assert_eq!(vuln.vulnerability.id, expected_vuln_id); + + let descriptions = vuln.descriptions("en", ()).await?; + assert_eq!(descriptions.len(), 0); + + Ok(()) +} diff --git a/test-context/src/lib.rs b/test-context/src/lib.rs index ee767ea3..bf3631b7 100644 --- a/test-context/src/lib.rs +++ b/test-context/src/lib.rs @@ -65,11 +65,20 @@ impl TrustifyContext { Ok(results) } + /// Same as [`self.ingest_document_as`], but with a format of [`Format::Unknown`]. pub async fn ingest_document(&self, path: &str) -> Result { + self.ingest_document_as(path, Format::Unknown).await + } + + pub async fn ingest_document_as( + &self, + path: &str, + format: Format, + ) -> Result { let bytes = document_bytes(path).await?; Ok(self .ingestor - .ingest(&bytes, Format::Unknown, ("source", "TrustifyContext"), None) + .ingest(&bytes, format, ("source", "TrustifyContext"), None) .await?) }