Skip to content

Commit

Permalink
fix: SparseArray scalar_at was broken due to strict PValue PartialOrd (
Browse files Browse the repository at this point in the history
…#1575)

We made a change to use `search_sorted_usize`, which delegates to
`search_sorted` with a u64 scalar. But this scalar did not support
partial_cmp with e.g. a u8 array, and instead of failing we just
returned `Ordering::Less` 🤦
  • Loading branch information
gatesn authored Dec 5, 2024
1 parent 22c4c5e commit ff8cd91
Show file tree
Hide file tree
Showing 25 changed files with 142 additions and 70 deletions.
12 changes: 9 additions & 3 deletions vortex-array/src/array/constant/compute/search_sorted.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
use std::cmp::Ordering;

use vortex_error::VortexResult;
use vortex_error::{vortex_err, VortexResult};
use vortex_scalar::Scalar;

use crate::array::{ConstantArray, ConstantEncoding};
use crate::compute::{SearchResult, SearchSortedFn, SearchSortedSide};
use crate::ArrayLen;
use crate::{ArrayDType, ArrayLen};

impl SearchSortedFn<ConstantArray> for ConstantEncoding {
fn search_sorted(
Expand All @@ -14,7 +14,13 @@ impl SearchSortedFn<ConstantArray> for ConstantEncoding {
value: &Scalar,
side: SearchSortedSide,
) -> VortexResult<SearchResult> {
match array.scalar().partial_cmp(value).unwrap_or(Ordering::Less) {
match array.scalar().partial_cmp(value).ok_or_else(|| {
vortex_err!(
"Cannot search sorted array type {} with value type {}",
array.dtype(),
value.dtype()
)
})? {
Ordering::Greater => Ok(SearchResult::NotFound(0)),
Ordering::Less => Ok(SearchResult::NotFound(array.len())),
Ordering::Equal => match side {
Expand Down
23 changes: 22 additions & 1 deletion vortex-array/src/array/sparse/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -247,9 +247,10 @@ mod test {
use vortex_dtype::Nullability::Nullable;
use vortex_dtype::{DType, PType};
use vortex_error::VortexError;
use vortex_scalar::Scalar;
use vortex_scalar::{PrimitiveScalar, Scalar};

use crate::array::sparse::SparseArray;
use crate::array::ConstantArray;
use crate::compute::{scalar_at, slice, try_cast};
use crate::validity::ArrayValidity;
use crate::{ArrayData, IntoArrayData, IntoArrayVariant};
Expand Down Expand Up @@ -295,6 +296,26 @@ mod test {
assert_eq!(stop, 10);
}

#[test]
pub fn test_scalar_at_again() {
let arr = SparseArray::try_new(
ConstantArray::new(10u32, 1).into_array(),
ConstantArray::new(Scalar::primitive(1234u32, Nullable), 1).into_array(),
100,
Scalar::null(DType::Primitive(PType::U32, Nullable)),
)
.unwrap();

assert_eq!(
PrimitiveScalar::try_from(&scalar_at(&arr, 10).unwrap())
.unwrap()
.typed_value::<u32>(),
Some(1234)
);
assert!(scalar_at(&arr, 0).unwrap().is_null());
assert!(scalar_at(&arr, 99).unwrap().is_null());
}

#[test]
pub fn scalar_at_sliced() {
let sliced = slice(sparse_array(nullable_fill()), 2, 7).unwrap();
Expand Down
22 changes: 21 additions & 1 deletion vortex-array/src/compute/search_sorted.rs
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,12 @@ pub fn search_sorted<T: Into<Scalar>>(
target: T,
side: SearchSortedSide,
) -> VortexResult<SearchResult> {
let scalar = target.into().cast(array.dtype())?;
let Ok(scalar) = target.into().cast(array.dtype()) else {
// If the cast fails, then the search value must be higher than the highest value in
// the array.
return Ok(SearchResult::NotFound(array.len()));
};

if scalar.is_null() {
vortex_bail!("Search sorted with null value is not supported");
}
Expand Down Expand Up @@ -473,7 +478,11 @@ impl<T> Len for [T] {

#[cfg(test)]
mod test {
use crate::array::PrimitiveArray;
use crate::compute::search_sorted;
use crate::compute::search_sorted::{SearchResult, SearchSorted, SearchSortedSide};
use crate::validity::Validity;
use crate::IntoArrayData;

#[test]
fn left_side_equal() {
Expand Down Expand Up @@ -522,4 +531,15 @@ mod test {
assert_eq!(arr[res.to_index() - 1], 9);
assert_eq!(res, SearchResult::Found(13));
}

#[test]
fn failed_cast() {
let arr = PrimitiveArray::from_vec(
vec![0u8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 9, 9, 9],
Validity::NonNullable,
)
.into_array();
let res = search_sorted(&arr, 256, SearchSortedSide::Left).unwrap();
assert_eq!(res, SearchResult::NotFound(arr.len()));
}
}
3 changes: 1 addition & 2 deletions vortex-sampling-compressor/src/compressors/alp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ use vortex_alp::{alp_encode_components, match_each_alp_float_ptype, ALPArray, AL
use vortex_array::aliases::hash_set::HashSet;
use vortex_array::array::PrimitiveArray;
use vortex_array::encoding::{Encoding, EncodingRef};
use vortex_array::stats::ArrayStatistics;
use vortex_array::variants::PrimitiveArrayTrait;
use vortex_array::{ArrayData, IntoArrayData, IntoArrayVariant};
use vortex_dtype::PType;
Expand Down Expand Up @@ -78,7 +77,7 @@ impl EncodingCompressor for ALPCompressor {
compressed_patches.and_then(|p| p.path),
],
)),
Some(array.statistics()),
array,
))
}

Expand Down
3 changes: 1 addition & 2 deletions vortex-sampling-compressor/src/compressors/alp_rd.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ use vortex_alp::{match_each_alp_float_ptype, ALPRDEncoding, RDEncoder as ALPRDEn
use vortex_array::aliases::hash_set::HashSet;
use vortex_array::array::PrimitiveArray;
use vortex_array::encoding::{Encoding, EncodingRef};
use vortex_array::stats::ArrayStatistics;
use vortex_array::variants::PrimitiveArrayTrait;
use vortex_array::{ArrayData, IntoArrayData, IntoArrayVariant};
use vortex_dtype::PType;
Expand Down Expand Up @@ -68,7 +67,7 @@ impl EncodingCompressor for ALPRDCompressor {
Ok(CompressedArray::compressed(
encoded,
Some(CompressionTree::new_with_metadata(self, vec![], encoder)),
Some(array.statistics()),
array,
))
}

Expand Down
2 changes: 1 addition & 1 deletion vortex-sampling-compressor/src/compressors/bitpacked.rs
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ impl EncodingCompressor for BitPackedCompressor {
self,
vec![patches.and_then(|p| p.path)],
)),
Some(array.statistics()),
array,
))
}

Expand Down
3 changes: 1 addition & 2 deletions vortex-sampling-compressor/src/compressors/chunked.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ use vortex_array::aliases::hash_set::HashSet;
use vortex_array::array::{ChunkedArray, ChunkedEncoding};
use vortex_array::compress::compute_precompression_stats;
use vortex_array::encoding::{Encoding, EncodingRef};
use vortex_array::stats::ArrayStatistics;
use vortex_array::{ArrayDType, ArrayData, IntoArrayData};
use vortex_error::{vortex_bail, VortexExpect, VortexResult};

Expand Down Expand Up @@ -130,7 +129,7 @@ impl ChunkedCompressor {
compressed_trees,
Arc::new(ChunkedCompressorMetadata(ratio)),
)),
Some(array.statistics()),
array,
))
}
}
Expand Down
2 changes: 1 addition & 1 deletion vortex-sampling-compressor/src/compressors/constant.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ impl EncodingCompressor for ConstantCompressor {
)
.into_array(),
Some(CompressionTree::flat(self)),
Some(array.statistics()),
array,
))
}

Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
use vortex_array::aliases::hash_set::HashSet;
use vortex_array::array::TemporalArray;
use vortex_array::encoding::{Encoding, EncodingRef};
use vortex_array::stats::ArrayStatistics;
use vortex_array::{ArrayDType, ArrayData, IntoArrayData};
use vortex_datetime_dtype::TemporalMetadata;
use vortex_datetime_parts::{
Expand Down Expand Up @@ -73,7 +72,7 @@ impl EncodingCompressor for DateTimePartsCompressor {
self,
vec![days.path, seconds.path, subsecond.path],
)),
Some(array.statistics()),
array,
))
}

Expand Down
3 changes: 1 addition & 2 deletions vortex-sampling-compressor/src/compressors/delta.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
use vortex_array::aliases::hash_set::HashSet;
use vortex_array::array::PrimitiveArray;
use vortex_array::encoding::{Encoding, EncodingRef};
use vortex_array::stats::ArrayStatistics;
use vortex_array::variants::PrimitiveArrayTrait;
use vortex_array::{ArrayData, IntoArrayData};
use vortex_error::VortexResult;
Expand Down Expand Up @@ -58,7 +57,7 @@ impl EncodingCompressor for DeltaCompressor {
DeltaArray::try_from_delta_compress_parts(bases.array, deltas.array, validity)?
.into_array(),
Some(CompressionTree::new(self, vec![bases.path, deltas.path])),
Some(array.statistics()),
array,
))
}

Expand Down
2 changes: 1 addition & 1 deletion vortex-sampling-compressor/src/compressors/dict.rs
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ impl EncodingCompressor for DictCompressor {
Ok(CompressedArray::compressed(
DictArray::try_new(codes.array, values.array)?.into_array(),
Some(CompressionTree::new(self, vec![codes.path, values.path])),
Some(array.statistics()),
array,
))
}

Expand Down
2 changes: 1 addition & 1 deletion vortex-sampling-compressor/src/compressors/for.rs
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ impl EncodingCompressor for FoRCompressor {
)
.map(|a| a.into_array())?,
Some(CompressionTree::new(self, vec![compressed_child.path])),
Some(array.statistics()),
array,
))
}

Expand Down
3 changes: 1 addition & 2 deletions vortex-sampling-compressor/src/compressors/fsst.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ use fsst::Compressor;
use vortex_array::aliases::hash_set::HashSet;
use vortex_array::array::{VarBinEncoding, VarBinViewEncoding};
use vortex_array::encoding::{Encoding, EncodingRef};
use vortex_array::stats::ArrayStatistics;
use vortex_array::{ArrayDType, IntoArrayData};
use vortex_dtype::DType;
use vortex_error::{vortex_bail, VortexResult};
Expand Down Expand Up @@ -128,7 +127,7 @@ impl EncodingCompressor for FSSTCompressor {
vec![None, None, compressed_codes.path, uncompressed_lengths.path],
compressor,
)),
Some(array.statistics()),
array,
))
}

Expand Down
3 changes: 1 addition & 2 deletions vortex-sampling-compressor/src/compressors/list.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
use vortex_array::aliases::hash_set::HashSet;
use vortex_array::array::{ListArray, ListEncoding};
use vortex_array::encoding::{Encoding, EncodingRef};
use vortex_array::stats::ArrayStatistics;
use vortex_array::{ArrayData, IntoArrayData};
use vortex_error::VortexResult;

Expand Down Expand Up @@ -51,7 +50,7 @@ impl EncodingCompressor for ListCompressor {
self,
vec![compressed_elements.path, compressed_offsets.path, None],
)),
Some(array.statistics()),
array,
))
}

Expand Down
42 changes: 31 additions & 11 deletions vortex-sampling-compressor/src/compressors/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@ use std::sync::Arc;
use itertools::{EitherOrBoth, Itertools};
use vortex_array::aliases::hash_set::HashSet;
use vortex_array::encoding::EncodingRef;
use vortex_array::stats::{ArrayStatistics, Statistics};
use vortex_array::stats::ArrayStatistics;
use vortex_array::tree::TreeFormatter;
use vortex_array::ArrayData;
use vortex_array::{ArrayDType, ArrayData};
use vortex_error::{vortex_panic, VortexExpect, VortexResult};

use crate::SamplingCompressor;
Expand Down Expand Up @@ -209,17 +209,37 @@ impl<'a> CompressedArray<'a> {
}

pub fn compressed(
array: ArrayData,
compressed: ArrayData,
path: Option<CompressionTree<'a>>,
stats_to_inherit: Option<&dyn Statistics>,
uncompressed: impl AsRef<ArrayData>,
) -> Self {
if let Some(stats) = stats_to_inherit {
// eagerly compute uncompressed size in bytes at compression time, since it's
// too expensive to compute after compression
let _ = stats.compute_uncompressed_size_in_bytes();
array.inherit_statistics(stats);
}
let compressed = Self { array, path };
let uncompressed = uncompressed.as_ref();

// Sanity check the compressed array
assert_eq!(
compressed.len(),
uncompressed.len(),
"Compressed array {} has different length to uncompressed",
compressed.encoding().id(),
);
assert_eq!(
compressed.dtype(),
uncompressed.dtype(),
"Compressed array {} has different dtype to uncompressed",
compressed.encoding().id(),
);

// eagerly compute uncompressed size in bytes at compression time, since it's
// too expensive to compute after compression
let _ = uncompressed
.statistics()
.compute_uncompressed_size_in_bytes();
compressed.inherit_statistics(uncompressed.statistics());

let compressed = Self {
array: compressed,
path,
};
compressed.validate();
compressed
}
Expand Down
3 changes: 1 addition & 2 deletions vortex-sampling-compressor/src/compressors/roaring_bool.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
use vortex_array::aliases::hash_set::HashSet;
use vortex_array::array::BoolEncoding;
use vortex_array::encoding::{Encoding, EncodingRef};
use vortex_array::stats::ArrayStatistics;
use vortex_array::{ArrayDType, ArrayData, IntoArrayData, IntoArrayVariant};
use vortex_dtype::DType;
use vortex_dtype::Nullability::NonNullable;
Expand Down Expand Up @@ -50,7 +49,7 @@ impl EncodingCompressor for RoaringBoolCompressor {
Ok(CompressedArray::compressed(
roaring_bool_encode(array.clone().into_bool()?)?.into_array(),
Some(CompressionTree::flat(self)),
Some(array.statistics()),
array,
))
}

Expand Down
2 changes: 1 addition & 1 deletion vortex-sampling-compressor/src/compressors/roaring_int.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ impl EncodingCompressor for RoaringIntCompressor {
Ok(CompressedArray::compressed(
roaring_int_encode(array.clone().into_primitive()?)?.into_array(),
Some(CompressionTree::flat(self)),
Some(array.statistics()),
array,
))
}

Expand Down
12 changes: 6 additions & 6 deletions vortex-sampling-compressor/src/compressors/runend.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,12 +51,12 @@ impl EncodingCompressor for RunEndCompressor {
ctx: SamplingCompressor<'a>,
) -> VortexResult<CompressedArray<'a>> {
let primitive_array = array.clone().into_primitive()?;

let (ends, values) = runend_encode(&primitive_array);
let compressed_ends = ctx.auxiliary("ends").compress(
&downscale_integer_array(ends.into_array())?,
like.as_ref().and_then(|l| l.child(0)),
)?;
let ends = downscale_integer_array(ends.into_array())?.into_primitive()?;

let compressed_ends = ctx
.auxiliary("ends")
.compress(&ends.into_array(), like.as_ref().and_then(|l| l.child(0)))?;
let compressed_values = ctx
.named("values")
.excluding(self)
Expand All @@ -73,7 +73,7 @@ impl EncodingCompressor for RunEndCompressor {
self,
vec![compressed_ends.path, compressed_values.path],
)),
Some(array.statistics()),
array,
))
}

Expand Down
3 changes: 1 addition & 2 deletions vortex-sampling-compressor/src/compressors/runend_bool.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
use vortex_array::aliases::hash_set::HashSet;
use vortex_array::array::{BoolEncoding, PrimitiveArray};
use vortex_array::encoding::{Encoding, EncodingRef};
use vortex_array::stats::ArrayStatistics as _;
use vortex_array::{ArrayData, IntoArrayData, IntoArrayVariant};
use vortex_error::VortexResult;
use vortex_runend_bool::compress::runend_bool_encode_slice;
Expand Down Expand Up @@ -50,7 +49,7 @@ impl EncodingCompressor for RunEndBoolCompressor {
RunEndBoolArray::try_new(compressed_ends.array, start, bool_array.validity())?
.into_array(),
Some(CompressionTree::new(self, vec![compressed_ends.path])),
Some(array.statistics()),
array,
))
}

Expand Down
Loading

0 comments on commit ff8cd91

Please sign in to comment.