Skip to content

Commit

Permalink
Add fuzzing for Take and SearchSorted functions (#724)
Browse files Browse the repository at this point in the history
  • Loading branch information
robert3005 authored Sep 5, 2024
1 parent 5682fee commit 36c33bd
Show file tree
Hide file tree
Showing 12 changed files with 247 additions and 78 deletions.
4 changes: 4 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 3 additions & 2 deletions fuzz/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,9 @@ cargo-fuzz = true
libfuzzer-sys = { workspace = true }
vortex-array = { workspace = true, features = ["arbitrary"] }
vortex-dtype = { workspace = true }
vortex-sampling-compressor = { workspace = true }
vortex-scalar = { workspace = true }
vortex-error = { workspace = true }
vortex-sampling-compressor = { workspace = true, features = ["arbitrary"] }
vortex-scalar = { workspace = true, features = ["arbitrary"] }

[lib]
name = "vortex_fuzz"
Expand Down
78 changes: 64 additions & 14 deletions fuzz/fuzz_targets/fuzz_target_1.rs
Original file line number Diff line number Diff line change
@@ -1,27 +1,19 @@
#![no_main]

use std::collections::HashSet;

use libfuzzer_sys::{fuzz_target, Corpus};
use vortex::compute::slice;
use vortex::compute::unary::scalar_at;
use vortex::compute::{search_sorted, slice, take, SearchResult, SearchSorted, SearchSortedSide};
use vortex::encoding::EncodingId;
use vortex::Array;
use vortex_error::VortexResult;
use vortex_fuzz::{Action, FuzzArrayAction};
use vortex_sampling_compressor::compressors::CompressorRef;
use vortex_sampling_compressor::SamplingCompressor;
use vortex_scalar::{PValue, Scalar, ScalarValue};

fuzz_target!(|fuzz_action: FuzzArrayAction| -> Corpus {
let FuzzArrayAction { array, actions } = fuzz_action;

// TODO(adamg): We actually might want to test empty things, but I'm punting this issue for now
if array.is_empty() {
return Corpus::Reject;
};

match &actions[0] {
Action::Compress(c) => match fuzz_compress(&array, *c) {
Action::Compress(c) => match fuzz_compress(&array, c) {
Some(compressed_array) => {
assert_array_eq(&array, &compressed_array);
Corpus::Keep
Expand All @@ -33,19 +25,61 @@ fuzz_target!(|fuzz_action: FuzzArrayAction| -> Corpus {
assert_slice(&array, &slice, range.start);
Corpus::Keep
}
Action::SearchSorted(s, side) => {
if !array_is_sorted(&array).unwrap() {
return Corpus::Reject;
}

let search_result = search_sorted(&array, s.clone(), *side).unwrap();
assert_search_sorted(&array, s, *side, search_result);
Corpus::Keep
}
Action::Take(indices) => {
if indices.is_empty() {
return Corpus::Reject;
}
let taken = take(&array, indices).unwrap();
assert_take(&array, &taken, indices);
Corpus::Keep
}
}
});

fn fuzz_compress(array: &Array, compressor_ref: CompressorRef<'_>) -> Option<Array> {
let ctx = SamplingCompressor::new(HashSet::from([compressor_ref]));
let compressed_array = ctx.compress(array, None).unwrap();
fn fuzz_compress(array: &Array, compressor: &SamplingCompressor) -> Option<Array> {
let compressed_array = compressor.compress(array, None).unwrap();

compressed_array
.path()
.is_some()
.then(|| compressed_array.into_array())
}

fn assert_search_sorted(
original: &Array,
value: &Scalar,
side: SearchSortedSide,
search_result: SearchResult,
) {
let result = SearchSorted::search_sorted(original, value, side);
assert_eq!(
result,
search_result,
"Searching for {value} in {} from {side}",
original.encoding().id()
)
}

fn assert_take(original: &Array, taken: &Array, indices: &Array) {
assert_eq!(taken.len(), indices.len());
for idx in 0..indices.len() {
let to_take = usize::try_from(&scalar_at(indices, idx).unwrap()).unwrap();
let o = scalar_at(original, to_take).unwrap();
let s = scalar_at(taken, idx).unwrap();

fuzzing_scalar_cmp(o, s, original.encoding().id(), taken.encoding().id(), idx);
}
}

fn assert_slice(original: &Array, slice: &Array, start: usize) {
for idx in 0..slice.len() {
let o = scalar_at(original, start + idx).unwrap();
Expand Down Expand Up @@ -93,3 +127,19 @@ fn fuzzing_scalar_cmp(
);
assert_eq!(l.is_valid(), r.is_valid());
}

fn array_is_sorted(array: &Array) -> VortexResult<bool> {
if array.is_empty() {
return Ok(true);
}

let mut last_value = scalar_at(array, 0)?;
for i in 1..array.len() {
let next_value = scalar_at(array, i)?;
if next_value < last_value {
return Ok(false);
}
last_value = next_value;
}
Ok(true)
}
95 changes: 50 additions & 45 deletions fuzz/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,67 +1,61 @@
use std::fmt::Debug;
use std::iter;
use std::ops::Range;

use libfuzzer_sys::arbitrary::{Arbitrary, Result, Unstructured};
use vortex::Array;
use vortex_sampling_compressor::compressors::alp::ALPCompressor;
use vortex_sampling_compressor::compressors::bitpacked::BitPackedCompressor;
use vortex_sampling_compressor::compressors::date_time_parts::DateTimePartsCompressor;
use vortex_sampling_compressor::compressors::dict::DictCompressor;
use vortex_sampling_compressor::compressors::r#for::FoRCompressor;
use vortex_sampling_compressor::compressors::roaring_bool::RoaringBoolCompressor;
use vortex_sampling_compressor::compressors::roaring_int::RoaringIntCompressor;
use vortex_sampling_compressor::compressors::runend::DEFAULT_RUN_END_COMPRESSOR;
use vortex_sampling_compressor::compressors::sparse::SparseCompressor;
use vortex_sampling_compressor::compressors::zigzag::ZigZagCompressor;
use vortex_sampling_compressor::compressors::EncodingCompressor;
use vortex::array::PrimitiveArray;
use vortex::compute::unary::scalar_at;
use vortex::compute::SearchSortedSide;
use vortex::{Array, ArrayDType};
use vortex_sampling_compressor::SamplingCompressor;
use vortex_scalar::arbitrary::random_scalar;
use vortex_scalar::Scalar;

#[derive(Debug)]
pub struct FuzzArrayAction {
pub array: Array,
pub actions: Vec<Action>,
}

impl Debug for FuzzArrayAction {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("FuzzArrayAction")
.field("action", &self.actions)
.field("array", &self.array)
.finish()
}
}

#[derive(Debug)]
pub enum Action {
Compress(&'static dyn EncodingCompressor),
Compress(SamplingCompressor<'static>),
Slice(Range<usize>),
}

impl Debug for Action {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Slice(arg0) => f.debug_tuple("Slice").field(arg0).finish(),
Self::Compress(c) => write!(f, "Compress({})", c.id()),
}
}
Take(Array),
SearchSorted(Scalar, SearchSortedSide),
}

impl<'a> Arbitrary<'a> for FuzzArrayAction {
fn arbitrary(u: &mut Unstructured<'a>) -> Result<Self> {
let array = Array::arbitrary(u)?;
let action = match u.int_in_range(0..=10)? {
0 => {
let start = u.choose_index(array.len())?;
let stop = u.choose_index(array.len() - start)? + start;
let len = array.len();
let action = match u.int_in_range(0..=3)? {
0 => Action::Compress(u.arbitrary()?),
1 => {
let start = u.choose_index(len)?;
let stop = u.int_in_range(start..=len)?;
Action::Slice(start..stop)
}
1 => Action::Compress(&ALPCompressor),
2 => Action::Compress(&BitPackedCompressor),
3 => Action::Compress(&DictCompressor),
4 => Action::Compress(&FoRCompressor),
5 => Action::Compress(&RoaringBoolCompressor),
6 => Action::Compress(&RoaringIntCompressor),
7 => Action::Compress(&DEFAULT_RUN_END_COMPRESSOR),
8 => Action::Compress(&SparseCompressor),
9 => Action::Compress(&ZigZagCompressor),
10 => Action::Compress(&DateTimePartsCompressor),
2 => {
let indices = PrimitiveArray::from(random_vec_in_range(u, 0, len - 1)?).into();
let compressed = SamplingCompressor::default()
.compress(&indices, None)
.unwrap();
Action::Take(compressed.into_array())
}
3 => {
let side = if u.arbitrary()? {
SearchSortedSide::Left
} else {
SearchSortedSide::Right
};
if u.arbitrary()? {
let random_value_in_array = scalar_at(&array, u.choose_index(len)?).unwrap();
Action::SearchSorted(random_value_in_array, side)
} else {
Action::SearchSorted(random_scalar(u, array.dtype())?, side)
}
}
_ => unreachable!(),
};

Expand All @@ -71,3 +65,14 @@ impl<'a> Arbitrary<'a> for FuzzArrayAction {
})
}
}

fn random_vec_in_range(u: &mut Unstructured<'_>, min: usize, max: usize) -> Result<Vec<u64>> {
iter::from_fn(|| {
if u.arbitrary().unwrap_or(false) {
Some(u.int_in_range(min..=max).map(|i| i as u64))
} else {
None
}
})
.collect::<Result<Vec<_>>>()
}
9 changes: 7 additions & 2 deletions vortex-array/src/array/varbinview/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -257,10 +257,15 @@ impl ArrayTrait for VarBinViewArray {}

impl IntoCanonical for VarBinViewArray {
fn into_canonical(self) -> VortexResult<Canonical> {
let arrow_dtype = if matches!(self.dtype(), &DType::Utf8(_)) {
&DataType::Utf8
} else {
&DataType::Binary
};
let nullable = self.dtype().is_nullable();
let arrow_self = as_arrow(self);
let arrow_varbin = arrow_cast::cast(arrow_self.deref(), &DataType::Utf8)
.map_err(VortexError::ArrowError)?;
let arrow_varbin =
arrow_cast::cast(arrow_self.deref(), arrow_dtype).map_err(VortexError::ArrowError)?;
let vortex_array = Array::from_arrow(arrow_varbin, nullable);

Ok(Canonical::VarBin(VarBinArray::try_from(&vortex_array)?))
Expand Down
10 changes: 10 additions & 0 deletions vortex-array/src/compute/search_sorted.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use std::cmp::Ordering;
use std::cmp::Ordering::{Equal, Greater, Less};
use std::fmt::{Display, Formatter};

use vortex_error::{vortex_bail, VortexResult};
use vortex_scalar::Scalar;
Expand All @@ -13,6 +14,15 @@ pub enum SearchSortedSide {
Right,
}

impl Display for SearchSortedSide {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match self {
SearchSortedSide::Left => write!(f, "left"),
SearchSortedSide::Right => write!(f, "right"),
}
}
}

#[derive(Debug, Copy, Clone, PartialEq, Eq)]
pub enum SearchResult {
Found(usize),
Expand Down
5 changes: 5 additions & 0 deletions vortex-sampling-compressor/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ rust-version = { workspace = true }

[dependencies]
fsst-rs = { workspace = true }
arbitrary = { workspace = true, optional = true }
lazy_static = { workspace = true }
log = { workspace = true }
rand = { workspace = true }
vortex-alp = { workspace = true }
Expand All @@ -33,3 +35,6 @@ chrono = { workspace = true }

[lints]
workspace = true

[features]
arbitrary = ["dep:arbitrary"]
19 changes: 19 additions & 0 deletions vortex-sampling-compressor/src/arbitrary.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
use std::collections::HashSet;

use arbitrary::{Arbitrary, Result, Unstructured};

use crate::compressors::{CompressorRef, EncodingCompressor};
use crate::{SamplingCompressor, ALL_COMPRESSORS};

impl<'a, 'b: 'a> Arbitrary<'a> for SamplingCompressor<'b> {
fn arbitrary(u: &mut Unstructured<'a>) -> Result<Self> {
let compressors: HashSet<CompressorRef> = u.arbitrary()?;
Ok(Self::new(compressors))
}
}

impl<'a, 'b: 'a> Arbitrary<'a> for &'b dyn EncodingCompressor {
fn arbitrary(u: &mut Unstructured<'a>) -> Result<Self> {
u.choose(&ALL_COMPRESSORS.clone()).cloned()
}
}
Loading

0 comments on commit 36c33bd

Please sign in to comment.