Skip to content

Commit

Permalink
fix: use search_sorted_usize when searching for indices (#1566)
Browse files Browse the repository at this point in the history
Originally found in a CI failure:
https://github.com/spiraldb/vortex/actions/runs/12170181872/job/33944625544?pr=1528

Brief description:

Let's imagine you have the following SparseArray

```
SparseArray(len=1000, indices=[2], values=[2])
```

If you `slice(array, 2, 1000)` then you end up where the end of the
slice exceeds the `ptype` of the indices array (after the changes for
`downcast_integer_array` went in earlier) and this would cause a
failure.

The fix is to use search_sorted_usize so that rather than casting the
scalar to the ptype of the array, we cast the array values up to `usize`
for comparison

Added a minimal test and verified it fails before the change:


![image](https://github.com/user-attachments/assets/0131b0e1-9f07-4e88-8774-1d4fbf111c1a)
  • Loading branch information
a10y authored Dec 5, 2024
1 parent cc055d4 commit 5ccf028
Show file tree
Hide file tree
Showing 7 changed files with 42 additions and 17 deletions.
4 changes: 2 additions & 2 deletions encodings/runend-bool/src/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use std::fmt::{Debug, Display};

use serde::{Deserialize, Serialize};
use vortex_array::array::{BoolArray, PrimitiveArray};
use vortex_array::compute::{scalar_at, search_sorted, SearchSortedSide};
use vortex_array::compute::{scalar_at, search_sorted_usize, SearchSortedSide};
use vortex_array::encoding::ids;
use vortex_array::stats::{ArrayStatistics, Stat, StatisticsVTable, StatsSet};
use vortex_array::validity::{LogicalValidity, Validity, ValidityMetadata, ValidityVTable};
Expand Down Expand Up @@ -112,7 +112,7 @@ impl RunEndBoolArray {
}

pub(crate) fn find_physical_index(&self, index: usize) -> VortexResult<usize> {
search_sorted(&self.ends(), index + self.offset(), SearchSortedSide::Right)
search_sorted_usize(&self.ends(), index + self.offset(), SearchSortedSide::Right)
.map(|s| s.to_ends_index(self.ends().len()))
}

Expand Down
6 changes: 4 additions & 2 deletions encodings/runend/src/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@ use std::fmt::{Debug, Display};

use serde::{Deserialize, Serialize};
use vortex_array::array::PrimitiveArray;
use vortex_array::compute::{scalar_at, search_sorted, search_sorted_usize_many, SearchSortedSide};
use vortex_array::compute::{
scalar_at, search_sorted_usize, search_sorted_usize_many, SearchSortedSide,
};
use vortex_array::encoding::ids;
use vortex_array::stats::{ArrayStatistics, Stat, StatisticsVTable, StatsSet};
use vortex_array::validity::{
Expand Down Expand Up @@ -115,7 +117,7 @@ impl RunEndArray {

/// Convert the given logical index to an index into the `values` array
pub fn find_physical_index(&self, index: usize) -> VortexResult<usize> {
search_sorted(&self.ends(), index + self.offset(), SearchSortedSide::Right)
search_sorted_usize(&self.ends(), index + self.offset(), SearchSortedSide::Right)
.map(|s| s.to_ends_index(self.ends().len()))
}

Expand Down
7 changes: 4 additions & 3 deletions vortex-array/src/array/chunked/compute/take.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ use vortex_scalar::Scalar;
use crate::array::chunked::ChunkedArray;
use crate::array::ChunkedEncoding;
use crate::compute::{
scalar_at, search_sorted, slice, subtract_scalar, take, try_cast, SearchSortedSide, TakeFn,
TakeOptions,
scalar_at, search_sorted_usize, slice, subtract_scalar, take, try_cast, SearchSortedSide,
TakeFn, TakeOptions,
};
use crate::stats::ArrayStatistics;
use crate::{ArrayDType, ArrayData, ArrayLen, IntoArrayData, IntoArrayVariant, ToArrayData};
Expand Down Expand Up @@ -90,7 +90,8 @@ fn take_strict_sorted(
// Find the end of this chunk, and locate that position in the indices array.
let chunk_begin = usize::try_from(&scalar_at(chunked.chunk_offsets(), chunk_idx)?)?;
let chunk_end = usize::try_from(&scalar_at(chunked.chunk_offsets(), chunk_idx + 1)?)?;
let chunk_end_pos = search_sorted(indices, chunk_end, SearchSortedSide::Left)?.to_index();
let chunk_end_pos =
search_sorted_usize(indices, chunk_end, SearchSortedSide::Left)?.to_index();

// Now we can say the slice of indices belonging to this chunk is [pos, chunk_end_pos)
let chunk_indices = slice(indices, pos, chunk_end_pos)?;
Expand Down
11 changes: 6 additions & 5 deletions vortex-array/src/array/chunked/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ use vortex_scalar::Scalar;

use crate::array::primitive::PrimitiveArray;
use crate::compute::{
scalar_at, search_sorted, subtract_scalar, SearchSortedSide, SubtractScalarFn,
scalar_at, search_sorted_usize, subtract_scalar, SearchSortedSide, SubtractScalarFn,
};
use crate::encoding::ids;
use crate::iter::{ArrayIterator, ArrayIteratorAdapter};
Expand Down Expand Up @@ -120,10 +120,11 @@ impl ChunkedArray {

// Since there might be duplicate values in offsets because of empty chunks we want to search from right
// and take the last chunk (we subtract 1 since there's a leading 0)
let index_chunk = search_sorted(&self.chunk_offsets(), index, SearchSortedSide::Right)
.vortex_expect("Search sorted failed in find_chunk_idx")
.to_ends_index(self.nchunks() + 1)
.saturating_sub(1);
let index_chunk =
search_sorted_usize(&self.chunk_offsets(), index, SearchSortedSide::Right)
.vortex_expect("Search sorted failed in find_chunk_idx")
.to_ends_index(self.nchunks() + 1)
.saturating_sub(1);
let chunk_start = scalar_at(self.chunk_offsets(), index_chunk)
.and_then(|s| usize::try_from(&s))
.vortex_expect("Failed to find chunk start in find_chunk_idx");
Expand Down
18 changes: 18 additions & 0 deletions vortex-array/src/array/sparse/compute/slice.rs
Original file line number Diff line number Diff line change
Expand Up @@ -75,4 +75,22 @@ mod tests {

assert_eq!(primitive_doubly_sliced.maybe_null_slice::<u32>(), &[13531]);
}

#[test]
fn slice_partially_invalid() {
let values = vec![0u64].into_array();
let indices = vec![0u8].into_array();

let sparse = SparseArray::try_new(indices, values, 1000, 999u64.into()).unwrap();
let sliced = slice(&sparse, 0, 1000).unwrap();
let mut expected = vec![999u64; 1000];
expected[0] = 0;

let actual = sliced
.into_primitive()
.unwrap()
.maybe_null_slice::<u64>()
.to_vec();
assert_eq!(expected, actual);
}
}
4 changes: 2 additions & 2 deletions vortex-array/src/array/sparse/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use vortex_error::{vortex_bail, vortex_panic, VortexExpect as _, VortexResult};
use vortex_scalar::{Scalar, ScalarValue};

use crate::array::constant::ConstantArray;
use crate::compute::{scalar_at, search_sorted, SearchResult, SearchSortedSide};
use crate::compute::{scalar_at, search_sorted_usize, SearchResult, SearchSortedSide};
use crate::encoding::ids;
use crate::stats::{ArrayStatistics, Stat, StatisticsVTable, StatsSet};
use crate::validity::{ArrayValidity, LogicalValidity, ValidityVTable};
Expand Down Expand Up @@ -124,7 +124,7 @@ impl SparseArray {

/// Returns the position or the insertion point of a given index in the indices array.
fn search_index(&self, index: usize) -> VortexResult<SearchResult> {
search_sorted(
search_sorted_usize(
&self.indices(),
self.indices_offset() + index,
SearchSortedSide::Left,
Expand Down
9 changes: 6 additions & 3 deletions vortex-array/src/stream/take_rows.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@ use vortex_dtype::match_each_integer_ptype;
use vortex_error::{vortex_bail, VortexResult};
use vortex_scalar::Scalar;

use crate::compute::{search_sorted, slice, subtract_scalar, take, SearchSortedSide, TakeOptions};
use crate::compute::{
search_sorted_usize, slice, subtract_scalar, take, SearchSortedSide, TakeOptions,
};
use crate::stats::{ArrayStatistics, Stat};
use crate::stream::ArrayStream;
use crate::variants::PrimitiveArrayTrait;
Expand Down Expand Up @@ -72,8 +74,9 @@ impl<R: ArrayStream> Stream for TakeRows<R> {

while let Some(batch) = ready!(this.reader.as_mut().poll_next(cx)?) {
let curr_offset = *this.row_offset;
let left = search_sorted(this.indices, curr_offset, SearchSortedSide::Left)?.to_index();
let right = search_sorted(
let left =
search_sorted_usize(this.indices, curr_offset, SearchSortedSide::Left)?.to_index();
let right = search_sorted_usize(
this.indices,
curr_offset + batch.len(),
SearchSortedSide::Left,
Expand Down

0 comments on commit 5ccf028

Please sign in to comment.