Skip to content

Commit

Permalink
fix: allow ingesting YAML based OSV
Browse files Browse the repository at this point in the history
This not only requires processing of YAML files, but also detecting them
with the Format enum.
  • Loading branch information
ctron committed Sep 9, 2024
1 parent e257a56 commit ae0472f
Show file tree
Hide file tree
Showing 4 changed files with 185 additions and 41 deletions.
41 changes: 41 additions & 0 deletions etc/test-data/osv/RSEC-2023-6.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
id: RSEC-2023-6
details: The commonmark package, specifically in its dependency on GitHub Flavored Markdown before version 0.29.0.gfm.1,
has a vulnerability related to time complexity. Parsing certain crafted markdown tables can take O(n * n) time,
leading to potential Denial of Service attacks. This issue does not affect the upstream cmark project and has been
fixed in version 0.29.0.gfm.1.
summary: Denial of Service (DoS) vulnerability
affected:
- package:
name: commonmark
ecosystem: CRAN
ranges:
- type: ECOSYSTEM
events:
- introduced: "0.2"
- fixed: "1.8"
versions:
- "0.2"
- "0.4"
- "0.5"
- "0.6"
- "0.7"
- "0.8"
- "0.9"
- "1.0"
- "1.1"
- "1.2"
- "1.4"
- "1.5"
- "1.6"
- "1.7"
references:
- type: WEB
url: https://security-tracker.debian.org/tracker/CVE-2020-5238
- type: WEB
url: https://github.com/r-lib/commonmark/issues/13
- type: WEB
url: https://github.com/r-lib/commonmark/pull/18
aliases:
- CVE-2020-5238
modified: "2023-10-20T07:27:00.600Z"
published: "2023-10-06T05:00:00.600Z"
25 changes: 23 additions & 2 deletions modules/ingestor/src/service/format.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ use tracing::instrument;
use trustify_common::hashing::Digests;
use trustify_entity::labels::Labels;

#[derive(Debug)]
#[derive(Clone, Copy, Debug)]
pub enum Format {
OSV,
CSAF,
Expand Down Expand Up @@ -74,7 +74,13 @@ impl<'g> Format {
Format::OSV => {
// issuer is :shrug: sometimes we can tell, sometimes not :shrug:
let loader = OsvLoader::new(graph);
let osv: Vulnerability = serde_json::from_slice(&buffer)?;
let osv: Vulnerability = serde_json::from_slice(&buffer)
.map_err(Error::from)
.or_else(|_| {
serde_yml::from_slice::<Nested>(&buffer)
.map(|osv| osv.0)
.map_err(Error::from)
})?;
loader.load(labels, osv, digests, issuer).await
}
Format::CVE => {
Expand Down Expand Up @@ -173,12 +179,21 @@ impl<'g> Format {
}

pub fn is_osv(bytes: &[u8]) -> Result<bool, Error> {
Ok(Self::is_osv_json(bytes)? || Self::is_osv_yaml(bytes)?)
}

pub fn is_osv_json(bytes: &[u8]) -> Result<bool, Error> {
match masked(depth(1).and(key("id")), bytes) {
Ok(Some(_)) => Ok(true),
Err(_) | Ok(None) => Ok(false),
}
}

pub fn is_osv_yaml(bytes: &[u8]) -> Result<bool, Error> {
// TODO: find a way to detect format with streaming
Ok(serde_yml::from_slice::<Nested>(bytes).is_ok())
}

pub fn is_spdx(bytes: &[u8]) -> Result<bool, Error> {
match masked(depth(1).and(key("spdxVersion")), bytes) {
Ok(Some(x)) if matches!(x.as_str(), "SPDX-2.2" | "SPDX-2.3") => Ok(true),
Expand Down Expand Up @@ -264,6 +279,9 @@ fn masked<N: Mask>(mask: N, bytes: &[u8]) -> Result<Option<String>, Error> {
.transpose()
}

#[derive(serde::Deserialize)]
struct Nested(#[serde(with = "serde_yml::with::singleton_map_recursive")] Vulnerability);

#[cfg(test)]
mod test {
use super::*;
Expand All @@ -281,6 +299,9 @@ mod test {
let osv = document_bytes("osv/RUSTSEC-2021-0079.json").await?;
assert!(matches!(Format::from_bytes(&osv), Ok(Format::OSV)));

let osv = document_bytes("osv/RSEC-2023-6.yaml").await?;
assert!(matches!(Format::from_bytes(&osv), Ok(Format::OSV)));

let cve = document_bytes("mitre/CVE-2024-27088.json").await?;
assert!(matches!(Format::from_bytes(&cve), Ok(Format::CVE)));

Expand Down
149 changes: 111 additions & 38 deletions modules/ingestor/tests/reingest/osv.rs
Original file line number Diff line number Diff line change
@@ -1,62 +1,135 @@
#![allow(clippy::expect_used)]

use anyhow::bail;
use std::future::Future;
use test_context::test_context;
use test_log::test;
use trustify_common::id::Id;
use trustify_module_ingestor::model::IngestResult;
use trustify_module_ingestor::service::Format;
use trustify_test_context::TrustifyContext;

#[test_context(TrustifyContext)]
#[test(tokio::test)]
async fn reingest(ctx: &TrustifyContext) -> Result<(), anyhow::Error> {
async fn assert(ctx: &TrustifyContext, result: IngestResult) -> anyhow::Result<()> {
let Id::Uuid(id) = result.id else {
bail!("must be an id")
};
let adv = ctx
.graph
.get_advisory_by_id(id, ())
.await?
.expect("must be found");

assert_eq!(adv.vulnerabilities(()).await?.len(), 1);

let all = adv.vulnerabilities(&()).await?;
assert_eq!(all.len(), 1);
assert_eq!(
all[0].advisory_vulnerability.vulnerability_id,
"CVE-2021-32714"
);

let all = ctx.graph.get_vulnerabilities(()).await?;
assert_eq!(all.len(), 1);

let vuln = ctx
.graph
.get_vulnerability("CVE-2021-32714", ())
.await?
.expect("Must be found");

assert_eq!(vuln.vulnerability.id, "CVE-2021-32714");

let descriptions = vuln.descriptions("en", ()).await?;
assert_eq!(descriptions.len(), 0);

Ok(())
}
async fn reingest_json(ctx: &TrustifyContext) -> Result<(), anyhow::Error> {
reingest(
ctx,
"osv/RUSTSEC-2021-0079.json",
Format::OSV,
|ctx, result| async move {
assert_common(ctx, &result, "CVE-2021-32714").await?;
Ok(())
},
)
.await
}

#[test_context(TrustifyContext)]
#[test(tokio::test)]
async fn reingest_json_unknown(ctx: &TrustifyContext) -> Result<(), anyhow::Error> {
reingest(
ctx,
"osv/RUSTSEC-2021-0079.json",
Format::Unknown,
|ctx, result| async move {
assert_common(ctx, &result, "CVE-2021-32714").await?;
Ok(())
},
)
.await
}

#[test_context(TrustifyContext)]
#[test(tokio::test)]
async fn reingest_yaml(ctx: &TrustifyContext) -> Result<(), anyhow::Error> {
reingest(
ctx,
"osv/RSEC-2023-6.yaml",
Format::OSV,
|ctx, result| async move {
assert_common(ctx, &result, "CVE-2020-5238").await?;
Ok(())
},
)
.await
}

#[test_context(TrustifyContext)]
#[test(tokio::test)]
async fn reingest_yaml_unknown(ctx: &TrustifyContext) -> Result<(), anyhow::Error> {
reingest(
ctx,
"osv/RSEC-2023-6.yaml",
Format::Unknown,
|ctx, result| async move {
assert_common(ctx, &result, "CVE-2020-5238").await?;
Ok(())
},
)
.await
}

async fn reingest<'a, F, Fut>(
ctx: &'a TrustifyContext,
file: &'static str,
format: Format,
assert: F,
) -> Result<(), anyhow::Error>
where
F: Fn(&'a TrustifyContext, IngestResult) -> Fut + 'a,
Fut: Future<Output = anyhow::Result<()>> + 'a,
{
// ingest once

let result = ctx.ingest_document("osv/RUSTSEC-2021-0079.json").await?;
let result = ctx.ingest_document_as(file, format).await?;
assert(ctx, result).await?;

// ingest second time

let result = ctx.ingest_document("osv/RUSTSEC-2021-0079.json").await?;
let result = ctx.ingest_document_as(file, format).await?;
assert(ctx, result).await?;

// done

Ok(())
}

async fn assert_common(
ctx: &TrustifyContext,
result: &IngestResult,
expected_vuln_id: &str,
) -> anyhow::Result<()> {
let Id::Uuid(id) = result.id else {
bail!("must be an id")
};
let adv = ctx
.graph
.get_advisory_by_id(id, ())
.await?
.expect("must be found");

assert_eq!(adv.vulnerabilities(()).await?.len(), 1);

let all = adv.vulnerabilities(&()).await?;
assert_eq!(all.len(), 1);
assert_eq!(
all[0].advisory_vulnerability.vulnerability_id,
expected_vuln_id
);

let all = ctx.graph.get_vulnerabilities(()).await?;
assert_eq!(all.len(), 1);

let vuln = ctx
.graph
.get_vulnerability(expected_vuln_id, ())
.await?
.expect("Must be found");

assert_eq!(vuln.vulnerability.id, expected_vuln_id);

let descriptions = vuln.descriptions("en", ()).await?;
assert_eq!(descriptions.len(), 0);

Ok(())
}
11 changes: 10 additions & 1 deletion test-context/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,11 +65,20 @@ impl TrustifyContext {
Ok(results)
}

/// Same as [`self.ingest_document_as`], but with a format of [`Format::Unknown`].
pub async fn ingest_document(&self, path: &str) -> Result<IngestResult, anyhow::Error> {
self.ingest_document_as(path, Format::Unknown).await
}

pub async fn ingest_document_as(
&self,
path: &str,
format: Format,
) -> Result<IngestResult, anyhow::Error> {
let bytes = document_bytes(path).await?;
Ok(self
.ingestor
.ingest(&bytes, Format::Unknown, ("source", "TrustifyContext"), None)
.ingest(&bytes, format, ("source", "TrustifyContext"), None)
.await?)
}

Expand Down

0 comments on commit ae0472f

Please sign in to comment.