Skip to content

Commit

Permalink
Revert "feat: patches uses a map in some cases" (#1629)
Browse files Browse the repository at this point in the history
Reverts #1626
  • Loading branch information
lwwmanning authored Dec 10, 2024
1 parent 0b93fe0 commit 3de4b29
Show file tree
Hide file tree
Showing 5 changed files with 11 additions and 236 deletions.
4 changes: 0 additions & 4 deletions vortex-array/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,3 @@ harness = false
[[bench]]
name = "take_strings"
harness = false

[[bench]]
name = "take_patches"
harness = false
71 changes: 0 additions & 71 deletions vortex-array/benches/take_patches.rs

This file was deleted.

4 changes: 2 additions & 2 deletions vortex-array/benches/take_strings.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,5 +45,5 @@ fn bench_varbinview(c: &mut Criterion) {
});
}

criterion_group!(bench_take_strings, bench_varbin, bench_varbinview);
criterion_main!(bench_take_strings);
criterion_group!(bench_take, bench_varbin, bench_varbinview);
criterion_main!(bench_take);
86 changes: 5 additions & 81 deletions vortex-array/src/patches.rs
Original file line number Diff line number Diff line change
@@ -1,15 +1,11 @@
use std::fmt::Debug;
use std::hash::Hash;

use serde::{Deserialize, Serialize};
use vortex_dtype::Nullability::NonNullable;
use vortex_dtype::{
match_each_integer_ptype, match_each_unsigned_integer_ptype, DType, NativeIndexPType, PType,
};
use vortex_dtype::{match_each_integer_ptype, DType, PType};
use vortex_error::{vortex_bail, VortexExpect, VortexResult};
use vortex_scalar::Scalar;

use crate::aliases::hash_map::HashMap;
use crate::array::PrimitiveArray;
use crate::compute::{
scalar_at, search_sorted, search_sorted_usize, search_sorted_usize_many, slice,
Expand Down Expand Up @@ -224,33 +220,15 @@ impl Patches {
Ok(Some(Self::new(stop - start, indices, values)))
}

// https://docs.google.com/spreadsheets/d/1D9vBZ1QJ6mwcIvV5wIL0hjGgVchcEnAyhvitqWu2ugU
const PREFER_MAP_WHEN_PATCHES_OVER_INDICES_LESS_THAN: f64 = 5.0;

fn is_map_faster_than_search(&self, take_indices: &ArrayData) -> bool {
(self.num_patches() as f64 / take_indices.len() as f64)
< Self::PREFER_MAP_WHEN_PATCHES_OVER_INDICES_LESS_THAN
}

/// Take the indices from the patches.
pub fn take(&self, take_indices: &ArrayData) -> VortexResult<Option<Self>> {
if self.is_map_faster_than_search(take_indices) {
self.take_map(take_indices)
} else {
self.take_search(take_indices)
}
}

pub fn take_search(&self, take_indices: &ArrayData) -> VortexResult<Option<Self>> {
let new_length = take_indices.len();

if take_indices.is_empty() {
pub fn take(&self, indices: &ArrayData) -> VortexResult<Option<Self>> {
if indices.is_empty() {
return Ok(None);
}

// TODO(ngates): plenty of optimisations to be made here
let take_indices =
try_cast(take_indices, &DType::Primitive(PType::U64, NonNullable))?.into_primitive()?;
try_cast(indices, &DType::Primitive(PType::U64, NonNullable))?.into_primitive()?;

let (values_indices, new_indices): (Vec<u64>, Vec<u64>) = search_sorted_usize_many(
self.indices(),
Expand Down Expand Up @@ -280,61 +258,7 @@ impl Patches {
PrimitiveArray::from_vec(values_indices, Validity::NonNullable).into_array();
let new_values = take(self.values(), values_indices)?;

Ok(Some(Self::new(new_length, new_indices, new_values)))
}

pub fn take_map(&self, take_indices: &ArrayData) -> VortexResult<Option<Self>> {
let take_indices = take_indices.clone().into_primitive()?;
match_each_unsigned_integer_ptype!(self.indices_ptype(), |$INDICES| {
let indices = self.indices
.clone()
.into_primitive()?;
let indices = indices
.maybe_null_slice::<$INDICES>();
match_each_unsigned_integer_ptype!(take_indices.ptype(), |$TAKE_INDICES| {
let take_indices = take_indices
.into_primitive()?;
let take_indices = take_indices
.maybe_null_slice::<$TAKE_INDICES>();
return self.take_map_helper(indices, take_indices);
});
});
}

fn take_map_helper<I: NativeIndexPType + Hash + Eq, TI: NativeIndexPType>(
&self,
indices: &[I],
take_indices: &[TI],
) -> VortexResult<Option<Self>> {
let new_length = take_indices.len();
let sparse_index_to_value_index: HashMap<I, usize> = indices
.iter()
.enumerate()
.map(|(value_index, sparse_index)| (*sparse_index, value_index))
.collect();
let min_index = self.min_index()?;
let max_index = self.max_index()?;
let (new_sparse_indices, value_indices): (Vec<u64>, Vec<u64>) = take_indices
.iter()
.enumerate()
.filter(|(_, ti)| ti.as_usize() < min_index || ti.as_usize() > max_index)
.filter_map(|(new_sparse_index, take_sparse_index)| {
sparse_index_to_value_index
.get(&I::usize_as(take_sparse_index.as_usize()))
// FIXME(DK): should we choose a small index width or should the compressor do that?
.map(|value_index| (new_sparse_index as u64, *value_index as u64))
})
.unzip();

if new_sparse_indices.is_empty() {
return Ok(None);
}

Ok(Some(Patches::new(
new_length,
ArrayData::from(new_sparse_indices),
take(self.values(), ArrayData::from(value_indices))?,
)))
Ok(Some(Self::new(indices.len(), new_indices, new_values)))
}
}

Expand Down
82 changes: 4 additions & 78 deletions vortex-dtype/src/ptype.rs
Original file line number Diff line number Diff line change
Expand Up @@ -83,40 +83,6 @@ pub trait NativePType:
fn is_eq(self, other: Self) -> bool;
}

/// A trait for native Rust types that correspond 1:1 to a PType used for indexing.
///
/// Vortex assumes that every value used as an index is representable as both a usize and a u64.
pub trait NativeIndexPType: NativePType {
/// Convert this value to a usize.
///
/// # Safety
///
/// 1. Assumes that the value is a valid index into an array on this machine.
fn as_usize(self) -> usize;

/// Convert this value to a u64.
///
/// # Safety
///
/// 1. Assumes that the value is a valid index into an array on this machine.
/// 2. Assumes this machine's pointers are not larger than u64.
fn as_u64(self) -> u64;

/// Convert a usize to this type.
///
/// # Safety
///
/// 1. Assumes that the value is small enough to fit in this type.
fn usize_as(value: usize) -> Self;

/// Convert a u64 to this type.
///
/// # Safety
///
/// 1. Assumes that the value is small enough to fit in this type.
fn u64_as(value: u64) -> Self;
}

macro_rules! native_ptype {
($T:ty, $ptype:tt, $arrow:tt) => {
impl NativePType for $T {
Expand All @@ -138,46 +104,6 @@ macro_rules! native_ptype {
};
}

macro_rules! native_index_ptype {
($T:ty, $ptype:tt, $arrow:tt) => {
impl NativePType for $T {
const PTYPE: PType = PType::$ptype;
type ArrowPrimitiveType = $arrow;

fn is_nan(self) -> bool {
false
}

fn total_compare(self, other: Self) -> Ordering {
self.cmp(&other)
}

fn is_eq(self, other: Self) -> bool {
self == other
}
}

#[allow(clippy::cast_possible_truncation)]
impl NativeIndexPType for $T {
fn as_usize(self) -> usize {
self as usize
}

fn as_u64(self) -> u64 {
self as u64
}

fn usize_as(value: usize) -> Self {
value as Self
}

fn u64_as(value: u64) -> Self {
value as Self
}
}
};
}

macro_rules! native_float_ptype {
($T:ty, $ptype:tt, $arrow:tt) => {
impl NativePType for $T {
Expand All @@ -199,10 +125,10 @@ macro_rules! native_float_ptype {
};
}

native_index_ptype!(u8, U8, UInt8Type);
native_index_ptype!(u16, U16, UInt16Type);
native_index_ptype!(u32, U32, UInt32Type);
native_index_ptype!(u64, U64, UInt64Type);
native_ptype!(u8, U8, UInt8Type);
native_ptype!(u16, U16, UInt16Type);
native_ptype!(u32, U32, UInt32Type);
native_ptype!(u64, U64, UInt64Type);
native_ptype!(i8, I8, Int8Type);
native_ptype!(i16, I16, Int16Type);
native_ptype!(i32, I32, Int32Type);
Expand Down

0 comments on commit 3de4b29

Please sign in to comment.