From 47b22833b4e226b8670881314d995f2e5ec80546 Mon Sep 17 00:00:00 2001 From: Adam Gutglick Date: Thu, 15 Aug 2024 17:41:59 +0100 Subject: [PATCH 1/4] Initial work --- fuzz/Cargo.toml | 8 +++- fuzz/fuzz_targets/fuzz_target_1.rs | 74 +++++++----------------------- fuzz/src/lib.rs | 70 ++++++++++++++++++++++++++++ 3 files changed, 92 insertions(+), 60 deletions(-) create mode 100644 fuzz/src/lib.rs diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index 522c25107f..166ff054f0 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -2,19 +2,23 @@ name = "vortex-fuzz" version = "0.0.0" publish = false -edition = "2021" +edition = { workspace = true } license = { workspace = true } [package.metadata] cargo-fuzz = true [dependencies] -libfuzzer-sys = "0.4" +libfuzzer-sys = { version = "0.4" } vortex-array = { workspace = true, features = ["arbitrary"] } vortex-dtype = { workspace = true } vortex-sampling-compressor = { workspace = true } vortex-scalar = { workspace = true } +[lib] +name = "vortex_fuzz" +path = "src/lib.rs" + [[bin]] name = "fuzz_target_1" diff --git a/fuzz/fuzz_targets/fuzz_target_1.rs b/fuzz/fuzz_targets/fuzz_target_1.rs index 2bc10f09dd..075bb5c0c0 100644 --- a/fuzz/fuzz_targets/fuzz_target_1.rs +++ b/fuzz/fuzz_targets/fuzz_target_1.rs @@ -2,81 +2,39 @@ use std::collections::HashSet; -use libfuzzer_sys::arbitrary::{Arbitrary, Unstructured}; use libfuzzer_sys::{fuzz_target, Corpus}; use vortex::compute::slice; use vortex::compute::unary::scalar_at; use vortex::encoding::EncodingId; use vortex::Array; -use vortex_sampling_compressor::compressors::alp::ALPCompressor; -use vortex_sampling_compressor::compressors::bitpacked::BitPackedCompressor; -use vortex_sampling_compressor::compressors::dict::DictCompressor; -use vortex_sampling_compressor::compressors::r#for::FoRCompressor; -use vortex_sampling_compressor::compressors::roaring_bool::RoaringBoolCompressor; -use vortex_sampling_compressor::compressors::roaring_int::RoaringIntCompressor; -use vortex_sampling_compressor::compressors::runend::DEFAULT_RUN_END_COMPRESSOR; -use vortex_sampling_compressor::compressors::sparse::SparseCompressor; -use vortex_sampling_compressor::compressors::zigzag::ZigZagCompressor; +use vortex_fuzz::FuzzArrayAction; use vortex_sampling_compressor::compressors::CompressorRef; use vortex_sampling_compressor::SamplingCompressor; use vortex_scalar::{PValue, Scalar, ScalarValue}; -fuzz_target!(|data: &[u8]| -> Corpus { - let mut u = Unstructured::new(data); - - let array = Array::arbitrary(&mut u).unwrap(); +fuzz_target!(|fuzz_action: FuzzArrayAction| -> Corpus { + let FuzzArrayAction { array, action } = fuzz_action; // TODO(adamg): We actually might want to test empty things, but I'm punting this issue for now if array.is_empty() { return Corpus::Reject; }; - match u.int_in_range(0..=9).unwrap() { - 0 => { - let start = u.choose_index(array.len()).unwrap(); - let stop = u.choose_index(array.len() - start).unwrap() + start; - let slice = slice(&array, start, stop).unwrap(); - assert_slice(&array, &slice, start); - } - 1 => match fuzz_compress(&array, &ALPCompressor) { - Some(compressed_array) => assert_array_eq(&array, &compressed_array), - None => return Corpus::Reject, - }, - 2 => match fuzz_compress(&array, &BitPackedCompressor) { - Some(compressed_array) => assert_array_eq(&array, &compressed_array), - None => return Corpus::Reject, - }, - 3 => match fuzz_compress(&array, &DictCompressor) { - Some(compressed_array) => assert_array_eq(&array, &compressed_array), - None => return Corpus::Reject, - }, - 4 => match fuzz_compress(&array, &FoRCompressor) { - Some(compressed_array) => assert_array_eq(&array, &compressed_array), - None => return Corpus::Reject, - }, - 5 => match fuzz_compress(&array, &RoaringBoolCompressor) { - Some(compressed_array) => assert_array_eq(&array, &compressed_array), - None => return Corpus::Reject, - }, - 6 => match fuzz_compress(&array, &RoaringIntCompressor) { - Some(compressed_array) => assert_array_eq(&array, &compressed_array), - None => return Corpus::Reject, - }, - 7 => match fuzz_compress(&array, &DEFAULT_RUN_END_COMPRESSOR) { - Some(compressed_array) => assert_array_eq(&array, &compressed_array), - None => return Corpus::Reject, - }, - 8 => match fuzz_compress(&array, &SparseCompressor) { - Some(compressed_array) => assert_array_eq(&array, &compressed_array), - None => return Corpus::Reject, - }, - 9 => match fuzz_compress(&array, &ZigZagCompressor) { - Some(compressed_array) => assert_array_eq(&array, &compressed_array), + + match action { + vortex_fuzz::Action::Compress(c) => match fuzz_compress(&array, c.as_ref()) { + Some(compressed_array) => { + assert_array_eq(&array, &compressed_array); + Corpus::Keep + } None => return Corpus::Reject, }, - _ => unreachable!(), + vortex_fuzz::Action::Slice(range) => { + let slice = slice(&array, range.start, range.end).unwrap(); + assert_slice(&array, &slice, range.start); + Corpus::Keep + } + vortex_fuzz::Action::NoOp => Corpus::Reject, } - - Corpus::Keep }); fn fuzz_compress(array: &Array, compressor_ref: CompressorRef<'_>) -> Option { diff --git a/fuzz/src/lib.rs b/fuzz/src/lib.rs new file mode 100644 index 0000000000..1545c4f104 --- /dev/null +++ b/fuzz/src/lib.rs @@ -0,0 +1,70 @@ +use std::ops::Range; + +use libfuzzer_sys::arbitrary::{Arbitrary, Result, Unstructured}; +use vortex::Array; +use vortex_sampling_compressor::compressors::alp::ALPCompressor; +use vortex_sampling_compressor::compressors::bitpacked::BitPackedCompressor; +use vortex_sampling_compressor::compressors::dict::DictCompressor; +use vortex_sampling_compressor::compressors::r#for::FoRCompressor; +use vortex_sampling_compressor::compressors::roaring_bool::RoaringBoolCompressor; +use vortex_sampling_compressor::compressors::roaring_int::RoaringIntCompressor; +use vortex_sampling_compressor::compressors::runend::DEFAULT_RUN_END_COMPRESSOR; +use vortex_sampling_compressor::compressors::sparse::SparseCompressor; +use vortex_sampling_compressor::compressors::zigzag::ZigZagCompressor; +use vortex_sampling_compressor::compressors::EncodingCompressor; + +pub struct FuzzArrayAction { + pub array: Array, + pub action: Action, +} + +impl std::fmt::Debug for FuzzArrayAction { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("FuzzArrayAction") + .field("action", &self.action) + .field("array", &self.array) + .finish() + } +} + +#[derive()] +pub enum Action { + NoOp, + Compress(Box), + Slice(Range), +} + +impl std::fmt::Debug for Action { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::NoOp => write!(f, "NoOp"), + Self::Slice(arg0) => f.debug_tuple("Slice").field(arg0).finish(), + Self::Compress(c) => write!(f, "Compress({})", c.id()), + } + } +} + +impl<'a> Arbitrary<'a> for FuzzArrayAction { + fn arbitrary(u: &mut Unstructured<'a>) -> Result { + let array = Array::arbitrary(u)?; + let action = match u.int_in_range(0..=9)? { + 0 => { + let start = u.choose_index(array.len())?; + let stop = u.choose_index(array.len() - start).unwrap() + start; + Action::Slice(start..stop) + } + 1 => Action::Compress(Box::new(ALPCompressor) as _), + 2 => Action::Compress(Box::new(BitPackedCompressor) as _), + 3 => Action::Compress(Box::new(DictCompressor) as _), + 4 => Action::Compress(Box::new(FoRCompressor) as _), + 5 => Action::Compress(Box::new(RoaringBoolCompressor) as _), + 6 => Action::Compress(Box::new(RoaringIntCompressor) as _), + 7 => Action::Compress(Box::new(DEFAULT_RUN_END_COMPRESSOR) as _), + 8 => Action::Compress(Box::new(SparseCompressor) as _), + 9 => Action::Compress(Box::new(ZigZagCompressor) as _), + _ => Action::NoOp, + }; + + Ok(Self { array, action }) + } +} From eeffdbaa277251fb720c7965c3a37130f58d2b75 Mon Sep 17 00:00:00 2001 From: Adam Gutglick Date: Thu, 15 Aug 2024 17:44:04 +0100 Subject: [PATCH 2/4] . --- fuzz/fuzz_targets/fuzz_target_1.rs | 8 ++++---- fuzz/src/lib.rs | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/fuzz/fuzz_targets/fuzz_target_1.rs b/fuzz/fuzz_targets/fuzz_target_1.rs index 075bb5c0c0..39ed63f9b2 100644 --- a/fuzz/fuzz_targets/fuzz_target_1.rs +++ b/fuzz/fuzz_targets/fuzz_target_1.rs @@ -7,7 +7,7 @@ use vortex::compute::slice; use vortex::compute::unary::scalar_at; use vortex::encoding::EncodingId; use vortex::Array; -use vortex_fuzz::FuzzArrayAction; +use vortex_fuzz::{Action, FuzzArrayAction}; use vortex_sampling_compressor::compressors::CompressorRef; use vortex_sampling_compressor::SamplingCompressor; use vortex_scalar::{PValue, Scalar, ScalarValue}; @@ -21,19 +21,19 @@ fuzz_target!(|fuzz_action: FuzzArrayAction| -> Corpus { }; match action { - vortex_fuzz::Action::Compress(c) => match fuzz_compress(&array, c.as_ref()) { + Action::Compress(c) => match fuzz_compress(&array, c.as_ref()) { Some(compressed_array) => { assert_array_eq(&array, &compressed_array); Corpus::Keep } None => return Corpus::Reject, }, - vortex_fuzz::Action::Slice(range) => { + Action::Slice(range) => { let slice = slice(&array, range.start, range.end).unwrap(); assert_slice(&array, &slice, range.start); Corpus::Keep } - vortex_fuzz::Action::NoOp => Corpus::Reject, + Action::NoOp => Corpus::Reject, } }); diff --git a/fuzz/src/lib.rs b/fuzz/src/lib.rs index 1545c4f104..2a9ad72d9f 100644 --- a/fuzz/src/lib.rs +++ b/fuzz/src/lib.rs @@ -50,7 +50,7 @@ impl<'a> Arbitrary<'a> for FuzzArrayAction { let action = match u.int_in_range(0..=9)? { 0 => { let start = u.choose_index(array.len())?; - let stop = u.choose_index(array.len() - start).unwrap() + start; + let stop = u.choose_index(array.len() - start)? + start; Action::Slice(start..stop) } 1 => Action::Compress(Box::new(ALPCompressor) as _), From 7eeb4edfbba4438b2be66a1d5799254156d4b136 Mon Sep 17 00:00:00 2001 From: Adam Gutglick Date: Thu, 15 Aug 2024 18:00:01 +0100 Subject: [PATCH 3/4] CR comments --- Cargo.toml | 1 + bench-vortex/Cargo.toml | 2 +- fuzz/Cargo.toml | 3 +-- fuzz/fuzz_targets/fuzz_target_1.rs | 6 +++--- fuzz/src/lib.rs | 29 ++++++++++++++++------------- vortex-expr/Cargo.toml | 2 +- 6 files changed, 23 insertions(+), 20 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 26b4f2b8ef..c67596b6cb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -38,6 +38,7 @@ edition = "2021" rust-version = "1.76" [workspace.dependencies] +libfuzzer-sys = "0.4" ahash = "0.8.11" allocator-api2 = "0.2.16" anyhow = "1.0" diff --git a/bench-vortex/Cargo.toml b/bench-vortex/Cargo.toml index ad1fdc34b2..dad389f850 100644 --- a/bench-vortex/Cargo.toml +++ b/bench-vortex/Cargo.toml @@ -22,8 +22,8 @@ arrow-schema = { workspace = true } arrow-select = { workspace = true } bytes = { workspace = true } bzip2 = { workspace = true } -csv = { workspace = true } clap = { workspace = true, features = ["derive"] } +csv = { workspace = true } datafusion = { workspace = true } enum-iterator = { workspace = true } flexbuffers = { workspace = true } diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index 166ff054f0..8df80c65fa 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -9,7 +9,7 @@ license = { workspace = true } cargo-fuzz = true [dependencies] -libfuzzer-sys = { version = "0.4" } +libfuzzer-sys = { workspace = true } vortex-array = { workspace = true, features = ["arbitrary"] } vortex-dtype = { workspace = true } vortex-sampling-compressor = { workspace = true } @@ -19,7 +19,6 @@ vortex-scalar = { workspace = true } name = "vortex_fuzz" path = "src/lib.rs" - [[bin]] name = "fuzz_target_1" path = "fuzz_targets/fuzz_target_1.rs" diff --git a/fuzz/fuzz_targets/fuzz_target_1.rs b/fuzz/fuzz_targets/fuzz_target_1.rs index 39ed63f9b2..3108404b0d 100644 --- a/fuzz/fuzz_targets/fuzz_target_1.rs +++ b/fuzz/fuzz_targets/fuzz_target_1.rs @@ -13,15 +13,15 @@ use vortex_sampling_compressor::SamplingCompressor; use vortex_scalar::{PValue, Scalar, ScalarValue}; fuzz_target!(|fuzz_action: FuzzArrayAction| -> Corpus { - let FuzzArrayAction { array, action } = fuzz_action; + let FuzzArrayAction { array, actions } = fuzz_action; // TODO(adamg): We actually might want to test empty things, but I'm punting this issue for now if array.is_empty() { return Corpus::Reject; }; - match action { - Action::Compress(c) => match fuzz_compress(&array, c.as_ref()) { + match &actions[0] { + Action::Compress(c) => match fuzz_compress(&array, *c) { Some(compressed_array) => { assert_array_eq(&array, &compressed_array); Corpus::Keep diff --git a/fuzz/src/lib.rs b/fuzz/src/lib.rs index 2a9ad72d9f..0c20323a14 100644 --- a/fuzz/src/lib.rs +++ b/fuzz/src/lib.rs @@ -15,13 +15,13 @@ use vortex_sampling_compressor::compressors::EncodingCompressor; pub struct FuzzArrayAction { pub array: Array, - pub action: Action, + pub actions: Vec, } impl std::fmt::Debug for FuzzArrayAction { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("FuzzArrayAction") - .field("action", &self.action) + .field("action", &self.actions) .field("array", &self.array) .finish() } @@ -30,7 +30,7 @@ impl std::fmt::Debug for FuzzArrayAction { #[derive()] pub enum Action { NoOp, - Compress(Box), + Compress(&'static dyn EncodingCompressor), Slice(Range), } @@ -53,18 +53,21 @@ impl<'a> Arbitrary<'a> for FuzzArrayAction { let stop = u.choose_index(array.len() - start)? + start; Action::Slice(start..stop) } - 1 => Action::Compress(Box::new(ALPCompressor) as _), - 2 => Action::Compress(Box::new(BitPackedCompressor) as _), - 3 => Action::Compress(Box::new(DictCompressor) as _), - 4 => Action::Compress(Box::new(FoRCompressor) as _), - 5 => Action::Compress(Box::new(RoaringBoolCompressor) as _), - 6 => Action::Compress(Box::new(RoaringIntCompressor) as _), - 7 => Action::Compress(Box::new(DEFAULT_RUN_END_COMPRESSOR) as _), - 8 => Action::Compress(Box::new(SparseCompressor) as _), - 9 => Action::Compress(Box::new(ZigZagCompressor) as _), + 1 => Action::Compress(&ALPCompressor), + 2 => Action::Compress(&BitPackedCompressor), + 3 => Action::Compress(&DictCompressor), + 4 => Action::Compress(&FoRCompressor), + 5 => Action::Compress(&RoaringBoolCompressor), + 6 => Action::Compress(&RoaringIntCompressor), + 7 => Action::Compress(&DEFAULT_RUN_END_COMPRESSOR), + 8 => Action::Compress(&SparseCompressor), + 9 => Action::Compress(&ZigZagCompressor), _ => Action::NoOp, }; - Ok(Self { array, action }) + Ok(Self { + array, + actions: vec![action], + }) } } diff --git a/vortex-expr/Cargo.toml b/vortex-expr/Cargo.toml index b968b140c7..7f65424d3c 100644 --- a/vortex-expr/Cargo.toml +++ b/vortex-expr/Cargo.toml @@ -26,8 +26,8 @@ prost = { workspace = true, optional = true } serde = { workspace = true, optional = true, features = ["derive"] } vortex-dtype = { workspace = true } vortex-error = { workspace = true } -vortex-scalar = { workspace = true } vortex-proto = { workspace = true, optional = true } +vortex-scalar = { workspace = true } [features] datafusion = ["dep:datafusion-common", "dep:datafusion-expr", "vortex-scalar/datafusion"] From ef2dfcca135b119ff8fb466494ffd266e1905084 Mon Sep 17 00:00:00 2001 From: Adam Gutglick Date: Thu, 15 Aug 2024 18:02:29 +0100 Subject: [PATCH 4/4] . --- fuzz/fuzz_targets/fuzz_target_1.rs | 1 - fuzz/src/lib.rs | 4 +--- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/fuzz/fuzz_targets/fuzz_target_1.rs b/fuzz/fuzz_targets/fuzz_target_1.rs index 3108404b0d..7b88ef17a0 100644 --- a/fuzz/fuzz_targets/fuzz_target_1.rs +++ b/fuzz/fuzz_targets/fuzz_target_1.rs @@ -33,7 +33,6 @@ fuzz_target!(|fuzz_action: FuzzArrayAction| -> Corpus { assert_slice(&array, &slice, range.start); Corpus::Keep } - Action::NoOp => Corpus::Reject, } }); diff --git a/fuzz/src/lib.rs b/fuzz/src/lib.rs index 0c20323a14..fed8e7df93 100644 --- a/fuzz/src/lib.rs +++ b/fuzz/src/lib.rs @@ -29,7 +29,6 @@ impl std::fmt::Debug for FuzzArrayAction { #[derive()] pub enum Action { - NoOp, Compress(&'static dyn EncodingCompressor), Slice(Range), } @@ -37,7 +36,6 @@ pub enum Action { impl std::fmt::Debug for Action { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { - Self::NoOp => write!(f, "NoOp"), Self::Slice(arg0) => f.debug_tuple("Slice").field(arg0).finish(), Self::Compress(c) => write!(f, "Compress({})", c.id()), } @@ -62,7 +60,7 @@ impl<'a> Arbitrary<'a> for FuzzArrayAction { 7 => Action::Compress(&DEFAULT_RUN_END_COMPRESSOR), 8 => Action::Compress(&SparseCompressor), 9 => Action::Compress(&ZigZagCompressor), - _ => Action::NoOp, + _ => unreachable!(), }; Ok(Self {