From 283b45c66d2b152bd9350ee8d6fde78d915bdd06 Mon Sep 17 00:00:00 2001 From: Daniel Schwartz-Narbonne Date: Tue, 21 May 2024 14:15:11 -0400 Subject: [PATCH] [profiling] Add fuzz test for StringTable (#437) --- .github/workflows/fuzz.yml | 25 ++++ Cargo.lock | 132 +++++++++++++++++- Cargo.toml | 7 + alloc/src/chain.rs | 3 + alloc/src/linear.rs | 4 + alloc/src/virtual.rs | 4 + profiling/Cargo.toml | 2 + profiling/src/collections/string_table/mod.rs | 77 ++++++++++ 8 files changed, 252 insertions(+), 2 deletions(-) create mode 100644 .github/workflows/fuzz.yml diff --git a/.github/workflows/fuzz.yml b/.github/workflows/fuzz.yml new file mode 100644 index 000000000..4d1da0211 --- /dev/null +++ b/.github/workflows/fuzz.yml @@ -0,0 +1,25 @@ +name: Fuzz test +on: + push: + +jobs: + run-fuzz: + runs-on: ubuntu-latest + env: + CARGO_TERM_COLOR: always + steps: + - uses: actions/checkout@v4 + - name: Set up Rust + run: | + set -e + rustup set profile minimal + rustup toolchain install nightly + rustup default nightly + - uses: taiki-e/install-action@v2 + with: + tool: cargo-bolero + - run: | + set -e + (cd profiling && cargo bolero test collections::string_table::tests::fuzz_string_table -T 1min) + (cd profiling && cargo bolero test collections::string_table::tests::fuzz_arena_allocator -T 1min) + diff --git a/Cargo.lock b/Cargo.lock index 60f7da9ea..0a7053b95 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -304,6 +304,97 @@ dependencies = [ "memoffset 0.9.1", ] +[[package]] +name = "bolero" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "212e8dca6d4001cc6cac941d6932ddaa8cd27f57e5e44a9da19c913eb6a43b33" +dependencies = [ + "bolero-afl", + "bolero-engine", + "bolero-generator", + "bolero-honggfuzz", + "bolero-kani", + "bolero-libfuzzer", + "cfg-if", + "rand", +] + +[[package]] +name = "bolero-afl" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1b34f05de1527425bb05287da09ff1ff1612538648824db49e16d9693b24065" +dependencies = [ + "bolero-engine", + "cc", +] + +[[package]] +name = "bolero-engine" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6206263ebdd42e093c1229dab3957f61c9fd68d73c00f238ae25a378778b6bd3" +dependencies = [ + "anyhow", + "backtrace", + "bolero-generator", + "lazy_static", + "pretty-hex", + "rand", +] + +[[package]] +name = "bolero-generator" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac749fb4f2e14734e835a9352c0d1eb2ab62a025d4c56a823fa3f391e015741a" +dependencies = [ + "bolero-generator-derive", + "either", + "rand_core", +] + +[[package]] +name = "bolero-generator-derive" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53397bfda19ccb48527faa14025048fc4bb76f090ccdeef1e5a355bfe4a94467" +dependencies = [ + "proc-macro-crate", + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "bolero-honggfuzz" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf78581db1a7263620a8767e645b93ad287c70122ae76f5bd67040c7f06ff8e3" +dependencies = [ + "bolero-engine", +] + +[[package]] +name = "bolero-kani" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0e55cec272a617f5ae4ce670db035108eb97c10cd4f67de851a3c8d3f18f19cb" +dependencies = [ + "bolero-engine", +] + +[[package]] +name = "bolero-libfuzzer" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb42f66ee3ec89b9c411994de59d4710ced19df96fea2059feea1c2d73904c5b" +dependencies = [ + "bolero-engine", + "cc", +] + [[package]] name = "build_common" version = "9.0.0" @@ -827,6 +918,7 @@ version = "9.0.0" dependencies = [ "anyhow", "bitmaps", + "bolero", "byteorder", "bytes", "chrono", @@ -2419,6 +2511,12 @@ dependencies = [ "nix 0.28.0", ] +[[package]] +name = "pretty-hex" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c6fa0831dd7cc608c38a5e323422a0077678fa5744aa2be4ad91c4ece8eec8d5" + [[package]] name = "pretty_assertions" version = "1.4.0" @@ -2449,6 +2547,16 @@ dependencies = [ "indexmap 1.9.3", ] +[[package]] +name = "proc-macro-crate" +version = "1.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f4c021e1093a56626774e81216a4ce732a735e5bad4868a03f3ed65ca0c3919" +dependencies = [ + "once_cell", + "toml_edit 0.19.15", +] + [[package]] name = "proc-macro-error" version = "1.0.4" @@ -3574,7 +3682,7 @@ dependencies = [ "serde", "serde_spanned", "toml_datetime", - "toml_edit", + "toml_edit 0.22.9", ] [[package]] @@ -3586,6 +3694,17 @@ dependencies = [ "serde", ] +[[package]] +name = "toml_edit" +version = "0.19.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b5bb770da30e5cbfde35a2d7b9b8a2c4b8ef89548a7a6aeab5c9a576e3e7421" +dependencies = [ + "indexmap 2.2.6", + "toml_datetime", + "winnow 0.5.40", +] + [[package]] name = "toml_edit" version = "0.22.9" @@ -3596,7 +3715,7 @@ dependencies = [ "serde", "serde_spanned", "toml_datetime", - "winnow", + "winnow 0.6.6", ] [[package]] @@ -4246,6 +4365,15 @@ version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32b752e52a2da0ddfbdbcc6fceadfeede4c939ed16d13e648833a61dfb611ed8" +[[package]] +name = "winnow" +version = "0.5.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f593a95398737aeed53e489c785df13f3618e41dbcd6718c6addbf1395aa6876" +dependencies = [ + "memchr", +] + [[package]] name = "winnow" version = "0.6.6" diff --git a/Cargo.toml b/Cargo.toml index e3f6d30bd..11080b6df 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -53,3 +53,10 @@ opt-level = "s" # optimize for size [profile.release.package.datadog-serverless-trace-mini-agent] strip = true + +# https://camshaft.github.io/bolero/library-installation.html +[profile.fuzz] +inherits = "dev" +opt-level = 3 +incremental = false +codegen-units = 1 diff --git a/alloc/src/chain.rs b/alloc/src/chain.rs index 7defb6e17..91fd866e1 100644 --- a/alloc/src/chain.rs +++ b/alloc/src/chain.rs @@ -214,6 +214,9 @@ impl ChainAllocator { unsafe impl Allocator for ChainAllocator { #[cfg_attr(debug_assertions, track_caller)] fn allocate(&self, layout: Layout) -> Result, AllocError> { + if layout.size() == 0 { + return Err(AllocError); + } let layout = layout.pad_to_align(); let remaining_capacity = self.remaining_capacity(); diff --git a/alloc/src/linear.rs b/alloc/src/linear.rs index 15d9f67d1..b42d6e73a 100644 --- a/alloc/src/linear.rs +++ b/alloc/src/linear.rs @@ -78,6 +78,10 @@ impl Drop for LinearAllocator { unsafe impl Allocator for LinearAllocator { fn allocate(&self, layout: Layout) -> Result, AllocError> { + if layout.size() == 0 { + return Err(AllocError); + } + // Find the needed allocation size including the necessary alignment. let size = self.used_bytes(); // SAFETY: base_ptr + size will always be in the allocated range, or diff --git a/alloc/src/virtual.rs b/alloc/src/virtual.rs index f6a38b0b2..5b94cb404 100644 --- a/alloc/src/virtual.rs +++ b/alloc/src/virtual.rs @@ -94,6 +94,10 @@ pub mod os { } fn allocate_zeroed(&self, layout: Layout) -> Result, AllocError> { + if layout.size() == 0 { + return Err(AllocError); + } + let size = super::layout_to_page_size(layout)?; let null = ptr::null_mut(); diff --git a/profiling/Cargo.toml b/profiling/Cargo.toml index 570c9a775..e1ee03104 100644 --- a/profiling/Cargo.toml +++ b/profiling/Cargo.toml @@ -46,4 +46,6 @@ tokio-util = "0.7.1" byteorder = { version = "1.5", features = ["std"] } [dev-dependencies] +bolero = "0.10.1" criterion = "0.5.1" + diff --git a/profiling/src/collections/string_table/mod.rs b/profiling/src/collections/string_table/mod.rs index e47f33c20..0c785417e 100644 --- a/profiling/src/collections/string_table/mod.rs +++ b/profiling/src/collections/string_table/mod.rs @@ -15,6 +15,11 @@ use std::alloc::Layout; pub trait ArenaAllocator: Allocator { /// Copies the str into the arena, and returns a slice to the new str. fn allocate(&self, str: &str) -> Result<&str, AllocError> { + // TODO: We might want each allocator to return its own empty string + // so we can debug where the value came from. + if str.is_empty() { + return Ok(""); + } let layout = Layout::for_value(str); let uninit_ptr = Allocator::allocate(self, layout)?; @@ -202,6 +207,78 @@ impl IntoLendingIterator for StringTable { mod tests { use super::*; + #[test] + fn fuzz_arena_allocator() { + bolero::check!() + .with_type::<(usize, Vec)>() + .for_each(|(size_hint, strings)| { + // If the size_hint is insanely large, get allowed allocation + // failures. These are not interesting, so avoid them. + if *size_hint > 4 * 1024 * 1024 * 1024 { + return; + } + let bytes = ChainAllocator::new_in(*size_hint, VirtualAllocator {}); + let mut allocated_strings = vec![]; + for string in strings { + let s = + ArenaAllocator::allocate(&bytes, string).expect("allocation to succeed"); + assert_eq!(s, string); + allocated_strings.push(s); + } + assert_eq!(strings.len(), allocated_strings.len()); + strings + .iter() + .zip(allocated_strings.iter()) + .for_each(|(s, t)| assert_eq!(s, t)); + }); + } + + /// This is a fuzz test for the allocation optimized `StringTable`. + /// It checks both safety (lack of crashes / sanitizer failures), + /// as well as functional correctness (the table should behave like an + /// ordered set). + /// Limitations: + /// - The crate used here to generate Strings internally has a default range for the length of + /// a string, (0..=64) We should experiment with longer strings to see what happens. https://github.com/camshaft/bolero/blob/f401669697ffcbe7f34cbfd09fd57b93d5df734c/lib/bolero-generator/src/alloc/mod.rs#L17 + /// - Since iterating is destructive, can only check the string values once. + /// `cargo +nightly bolero test collections::string_table::tests::fuzz_string_table -T 1min` + #[test] + fn fuzz_string_table() { + bolero::check!() + .with_type::>() + .for_each(|strings| { + // Compare our optimized implementation against a "golden" version + // from the standard library. + let mut golden_list = vec![""]; + let mut golden_set = std::collections::HashSet::from([""]); + let mut st = StringTable::new(); + + for string in strings { + assert_eq!(st.len(), golden_set.len()); + if golden_set.insert(string) { + golden_list.push(string); + } + + let str_id = st.intern(string); + // The str_id should refer to the id_th string interned + // on the list. We can't look inside the `StringTable` + // in a non-desctrive way, but fortunately we have the + // `golden_list` to compare against. + assert_eq!(string, golden_list[str_id.to_offset()]); + } + assert_eq!(st.len(), golden_list.len()); + assert_eq!(st.len(), golden_set.len()); + + // Check that the strings remain in order + let mut it = st.into_lending_iter(); + let mut idx = 0; + while let Some(s) = it.next() { + assert_eq!(s, golden_list[idx]); + idx += 1; + } + }) + } + #[test] fn test_basics() { let mut table = StringTable::new();