Skip to content

Commit

Permalink
fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
robert3005 committed Mar 20, 2024
1 parent 6703b54 commit fe8fbe4
Show file tree
Hide file tree
Showing 14 changed files with 188 additions and 10 deletions.
2 changes: 1 addition & 1 deletion pyvortex/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ fn dtype_bool(py: Python<'_>, nullable: bool) -> PyResult<Py<PyDType>> {
#[pyo3(signature = (width = None, signed = true, nullable = false))]
fn dtype_int(
py: Python<'_>,
width: Option<i8>,
width: Option<i16>,
signed: bool,
nullable: bool,
) -> PyResult<Py<PyDType>> {
Expand Down
2 changes: 2 additions & 0 deletions vortex-array/src/array/primitive/compute/as_arrow.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,12 @@ impl AsArrowArray for PrimitiveArray {
PType::U16 => Arc::new(as_arrow_array_primitive::<UInt16Type>(self)?),
PType::U32 => Arc::new(as_arrow_array_primitive::<UInt32Type>(self)?),
PType::U64 => Arc::new(as_arrow_array_primitive::<UInt64Type>(self)?),
PType::U128 => Arc::new(as_arrow_array_primitive::<Decimal128Type>(self)?),
PType::I8 => Arc::new(as_arrow_array_primitive::<Int8Type>(self)?),
PType::I16 => Arc::new(as_arrow_array_primitive::<Int16Type>(self)?),
PType::I32 => Arc::new(as_arrow_array_primitive::<Int32Type>(self)?),
PType::I64 => Arc::new(as_arrow_array_primitive::<Int64Type>(self)?),
PType::I128 => Arc::new(as_arrow_array_primitive::<Decimal128Type>(self)?),
PType::F16 => Arc::new(as_arrow_array_primitive::<Float16Type>(self)?),
PType::F32 => Arc::new(as_arrow_array_primitive::<Float32Type>(self)?),
PType::F64 => Arc::new(as_arrow_array_primitive::<Float64Type>(self)?),
Expand Down
4 changes: 4 additions & 0 deletions vortex-array/src/array/primitive/stats.rs
Original file line number Diff line number Diff line change
Expand Up @@ -98,10 +98,12 @@ impl<T: NativePType + Into<PScalar>> BitWidth for T {
PScalar::U16(i) => bit_width - i.leading_zeros() as usize,
PScalar::U32(i) => bit_width - i.leading_zeros() as usize,
PScalar::U64(i) => bit_width - i.leading_zeros() as usize,
PScalar::U128(i) => bit_width - i.leading_zeros() as usize,
PScalar::I8(i) => bit_width - i.leading_zeros() as usize,
PScalar::I16(i) => bit_width - i.leading_zeros() as usize,
PScalar::I32(i) => bit_width - i.leading_zeros() as usize,
PScalar::I64(i) => bit_width - i.leading_zeros() as usize,
PScalar::I128(i) => bit_width - i.leading_zeros() as usize,
PScalar::F16(_) => bit_width,
PScalar::F32(_) => bit_width,
PScalar::F64(_) => bit_width,
Expand All @@ -115,10 +117,12 @@ impl<T: NativePType + Into<PScalar>> BitWidth for T {
PScalar::U16(i) => i.trailing_zeros() as usize,
PScalar::U32(i) => i.trailing_zeros() as usize,
PScalar::U64(i) => i.trailing_zeros() as usize,
PScalar::U128(i) => i.trailing_zeros() as usize,
PScalar::I8(i) => i.trailing_zeros() as usize,
PScalar::I16(i) => i.trailing_zeros() as usize,
PScalar::I32(i) => i.trailing_zeros() as usize,
PScalar::I64(i) => i.trailing_zeros() as usize,
PScalar::I128(i) => i.trailing_zeros() as usize,
PScalar::F16(_) => 0,
PScalar::F32(_) => 0,
PScalar::F64(_) => 0,
Expand Down
2 changes: 1 addition & 1 deletion vortex-array/src/array/varbin/compute.rs
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ impl AsArrowArray for VarBinArray {
&PType::U64 => flatten_primitive(cast(offsets.as_ref(), &PType::I64.into())?.as_ref())?,
_ => flatten_primitive(cast(offsets.as_ref(), &PType::I32.into())?.as_ref())?,
};
let nulls = as_nulls(offsets.validity())?;
let nulls = as_nulls(self.validity())?;

let data = flatten_primitive(self.bytes())?;
assert_eq!(data.ptype(), &PType::U8);
Expand Down
80 changes: 79 additions & 1 deletion vortex-array/src/array/varbinview/compute.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,30 @@
use std::sync::Arc;

use arrow_array::{ArrayRef as ArrowArrayRef, BinaryViewArray, StringViewArray};
use itertools::Itertools;

use vortex_schema::DType;

use crate::array::varbinview::VarBinViewArray;
use crate::array::Array;
use crate::arrow::wrappers::{as_nulls, as_scalar_buffer};
use crate::compute::as_arrow::AsArrowArray;
use crate::compute::flatten::{flatten, flatten_primitive, FlattenFn, FlattenedArray};
use crate::compute::scalar_at::ScalarAtFn;
use crate::compute::ArrayCompute;
use crate::error::VortexResult;
use crate::ptype::PType;
use crate::scalar::Scalar;
use vortex_schema::DType;

impl ArrayCompute for VarBinViewArray {
fn as_arrow(&self) -> Option<&dyn AsArrowArray> {
Some(self)
}

fn flatten(&self) -> Option<&dyn FlattenFn> {
Some(self)
}

fn scalar_at(&self) -> Option<&dyn ScalarAtFn> {
Some(self)
}
Expand All @@ -27,3 +45,63 @@ impl ScalarAtFn for VarBinViewArray {
}
}
}

impl FlattenFn for VarBinViewArray {
fn flatten(&self) -> VortexResult<FlattenedArray> {
let views = flatten(self.views())?.into_array();
let data = self
.data()
.iter()
.map(|d| flatten(d.as_ref()).unwrap().into_array())
.collect::<Vec<_>>();
let validity = self
.validity()
.map(|v| flatten(v).map(FlattenedArray::into_array))
.transpose()?;
Ok(FlattenedArray::VarBinView(VarBinViewArray::new(
views,
data,
self.dtype.clone(),
validity,
)))
}
}

impl AsArrowArray for VarBinViewArray {
fn as_arrow(&self) -> VortexResult<ArrowArrayRef> {
// Ensure the offsets are either i32 or i64
let views = flatten_primitive(self.views())?;
assert_eq!(views.ptype(), &PType::U128);
let nulls = as_nulls(self.validity())?;

let data = self
.data()
.iter()
.map(|d| flatten_primitive(d.as_ref()).unwrap())
.collect::<Vec<_>>();
if !data.is_empty() {
assert_eq!(data[0].ptype(), &PType::U8);
assert!(data.iter().map(|d| d.ptype()).all_equal());
}

let data = data
.iter()
.map(|p| p.buffer().to_owned())
.collect::<Vec<_>>();

// Switch on Arrow DType.
Ok(match self.dtype() {
DType::Binary(_) => Arc::new(BinaryViewArray::new(
as_scalar_buffer::<u128>(views),
data,
nulls,
)),
DType::Utf8(_) => Arc::new(StringViewArray::new(
as_scalar_buffer::<u128>(views),
data,
nulls,
)),
_ => return Err(VortexError::InvalidDType(self.dtype().clone())),
})
}
}
3 changes: 3 additions & 0 deletions vortex-array/src/compute/flatten.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ use crate::array::composite::CompositeArray;
use crate::array::primitive::PrimitiveArray;
use crate::array::struct_::StructArray;
use crate::array::varbin::VarBinArray;
use crate::array::varbinview::VarBinViewArray;
use crate::array::{Array, ArrayRef};
use crate::error::{VortexError, VortexResult};

Expand All @@ -19,6 +20,7 @@ pub enum FlattenedArray {
Primitive(PrimitiveArray),
Struct(StructArray),
VarBin(VarBinArray),
VarBinView(VarBinViewArray),
}

impl FlattenedArray {
Expand All @@ -30,6 +32,7 @@ impl FlattenedArray {
FlattenedArray::Primitive(array) => array.boxed(),
FlattenedArray::Struct(array) => array.boxed(),
FlattenedArray::VarBin(array) => array.boxed(),
FlattenedArray::VarBinView(array) => array.boxed(),
}
}
}
Expand Down
48 changes: 43 additions & 5 deletions vortex-array/src/encode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,18 @@ use arrow_array::array::{
use arrow_array::array::{ArrowPrimitiveType, OffsetSizeTrait};
use arrow_array::cast::{as_null_array, AsArray};
use arrow_array::types::{
ByteArrayType, Date32Type, Date64Type, DurationMicrosecondType, DurationMillisecondType,
DurationNanosecondType, DurationSecondType, Time32MillisecondType, Time32SecondType,
Time64MicrosecondType, Time64NanosecondType, TimestampMicrosecondType,
ByteArrayType, ByteViewType, Date32Type, Date64Type, DurationMicrosecondType,
DurationMillisecondType, DurationNanosecondType, DurationSecondType, Time32MillisecondType,
Time32SecondType, Time64MicrosecondType, Time64NanosecondType, TimestampMicrosecondType,
TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType,
};
use arrow_array::types::{
Float16Type, Float32Type, Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, UInt16Type,
UInt32Type, UInt64Type, UInt8Type,
};
use arrow_array::{BinaryViewArray, GenericByteViewArray, StringViewArray};
use arrow_buffer::buffer::{NullBuffer, OffsetBuffer};
use arrow_buffer::Buffer;
use arrow_buffer::{ArrowNativeType, Buffer, ScalarBuffer};
use arrow_schema::{DataType, TimeUnit};

use vortex_schema::DType;
Expand All @@ -28,9 +29,10 @@ use crate::array::constant::ConstantArray;
use crate::array::primitive::PrimitiveArray;
use crate::array::struct_::StructArray;
use crate::array::varbin::VarBinArray;
use crate::array::varbinview::VarBinViewArray;
use crate::array::{Array, ArrayRef};
use crate::datetime::{LocalDateTime, LocalDateTimeArray};
use crate::ptype::PType;
use crate::ptype::{NativePType, PType};
use crate::scalar::NullScalar;

pub trait FromArrowArray<A> {
Expand All @@ -49,6 +51,12 @@ impl From<&NullBuffer> for ArrayRef {
}
}

impl<T: ArrowNativeType + NativePType> From<&ScalarBuffer<T>> for ArrayRef {
fn from(value: &ScalarBuffer<T>) -> Self {
PrimitiveArray::new(T::PTYPE, value.inner().to_owned(), None).boxed()
}
}

impl<O: OffsetSizeTrait> From<&OffsetBuffer<O>> for ArrayRef {
fn from(value: &OffsetBuffer<O>) -> Self {
let ptype = if O::IS_LARGE { PType::I64 } else { PType::I32 };
Expand Down Expand Up @@ -107,6 +115,28 @@ impl<T: ByteArrayType> FromArrowArray<&GenericByteArray<T>> for ArrayRef {
}
}

impl<T: ByteViewType> FromArrowArray<&GenericByteViewArray<T>> for ArrayRef {
fn from_arrow(value: &GenericByteViewArray<T>, nullable: bool) -> Self {
let dtype = match T::DATA_TYPE {
DataType::BinaryView => DType::Binary(nullable.into()),
DataType::Utf8View => DType::Utf8(nullable.into()),
_ => panic!("Invalid data type for ByteViewArray"),
};

VarBinViewArray::new(
value.views().into(),
value
.data_buffers()
.iter()
.map(|b| b.into())
.collect::<Vec<_>>(),
dtype,
nulls(value.nulls(), nullable, value.len()),
)
.boxed()
}
}

impl FromArrowArray<&ArrowBooleanArray> for ArrayRef {
fn from_arrow(value: &ArrowBooleanArray, nullable: bool) -> Self {
BoolArray::new(
Expand Down Expand Up @@ -184,6 +214,14 @@ impl FromArrowArray<ArrowArrayRef> for ArrayRef {
DataType::LargeUtf8 => ArrayRef::from_arrow(array.as_string::<i64>(), nullable),
DataType::Binary => ArrayRef::from_arrow(array.as_binary::<i32>(), nullable),
DataType::LargeBinary => ArrayRef::from_arrow(array.as_binary::<i64>(), nullable),
DataType::BinaryView => ArrayRef::from_arrow(
array.as_any().downcast_ref::<BinaryViewArray>().unwrap(),
nullable,
),
DataType::Utf8View => ArrayRef::from_arrow(
array.as_any().downcast_ref::<StringViewArray>().unwrap(),
nullable,
),
DataType::Struct(_) => ArrayRef::from_arrow(array.as_struct(), nullable),
DataType::Null => ArrayRef::from_arrow(as_null_array(array.as_ref()), nullable),
DataType::Timestamp(u, _) => match u {
Expand Down
15 changes: 15 additions & 0 deletions vortex-array/src/ptype.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,12 @@ pub enum PType {
U16,
U32,
U64,
U128,
I8,
I16,
I32,
I64,
I128,
F16,
F32,
F64,
Expand Down Expand Up @@ -58,10 +60,12 @@ native_ptype!(u8, U8);
native_ptype!(u16, U16);
native_ptype!(u32, U32);
native_ptype!(u64, U64);
native_ptype!(u128, U128);
native_ptype!(i8, I8);
native_ptype!(i16, I16);
native_ptype!(i32, I32);
native_ptype!(i64, I64);
native_ptype!(i128, I128);
native_ptype!(f16, F16);
native_ptype!(f32, F32);
native_ptype!(f64, F64);
Expand All @@ -77,10 +81,12 @@ macro_rules! match_each_native_ptype {
PType::I16 => __with__! { i16 },
PType::I32 => __with__! { i32 },
PType::I64 => __with__! { i64 },
PType::I128 => __with__! { i128},
PType::U8 => __with__! { u8 },
PType::U16 => __with__! { u16 },
PType::U32 => __with__! { u32 },
PType::U64 => __with__! { u64 },
PType::U128 => __with__! { u128},
PType::F16 => __with__! { f16 },
PType::F32 => __with__! { f32 },
PType::F64 => __with__! { f64 },
Expand All @@ -99,10 +105,12 @@ macro_rules! match_each_integer_ptype {
PType::I16 => __with__! { i16 },
PType::I32 => __with__! { i32 },
PType::I64 => __with__! { i64 },
PType::I128 => __with__! { i128 },
PType::U8 => __with__! { u8 },
PType::U16 => __with__! { u16 },
PType::U32 => __with__! { u32 },
PType::U64 => __with__! { u64 },
PType::U128 => __with__! { u128},
_ => panic!("Unsupported ptype {:?}", $self),
}
})
Expand Down Expand Up @@ -142,10 +150,12 @@ impl Display for PType {
PType::U16 => write!(f, "u16"),
PType::U32 => write!(f, "u32"),
PType::U64 => write!(f, "u64"),
PType::U128 => write!(f, "u128"),
PType::I8 => write!(f, "i8"),
PType::I16 => write!(f, "i16"),
PType::I32 => write!(f, "i32"),
PType::I64 => write!(f, "i64"),
PType::I128 => write!(f, "i128"),
PType::F16 => write!(f, "f16"),
PType::F32 => write!(f, "f32"),
PType::F64 => write!(f, "f64"),
Expand Down Expand Up @@ -185,6 +195,11 @@ impl TryFrom<&DType> for PType {
Unsigned => Ok(PType::U64),
Signed => Ok(PType::I64),
},
IntWidth::_128 => match s {
Unknown => Ok(PType::I128),
Unsigned => Ok(PType::U128),
Signed => Ok(PType::I128),
},
},
DType::Float(f, _) => match f {
FloatWidth::Unknown => Ok(PType::F64),
Expand Down
4 changes: 4 additions & 0 deletions vortex-array/src/scalar/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -104,13 +104,17 @@ impl Scalar {
(IntWidth::_64, Signedness::Unknown | Signedness::Signed) => {
PrimitiveScalar::none(PType::I64).into()
}
(IntWidth::_128, Signedness::Unknown | Signedness::Signed) => {
PrimitiveScalar::none(PType::I128).into()
}
(IntWidth::Unknown, Signedness::Unsigned) => {
PrimitiveScalar::none(PType::U64).into()
}
(IntWidth::_8, Signedness::Unsigned) => PrimitiveScalar::none(PType::U8).into(),
(IntWidth::_16, Signedness::Unsigned) => PrimitiveScalar::none(PType::U16).into(),
(IntWidth::_32, Signedness::Unsigned) => PrimitiveScalar::none(PType::U32).into(),
(IntWidth::_64, Signedness::Unsigned) => PrimitiveScalar::none(PType::U64).into(),
(IntWidth::_128, Signedness::Unsigned) => PrimitiveScalar::none(PType::U128).into(),
},
DType::Decimal(_, _, _) => unimplemented!("DecimalScalar"),
DType::Float(w, _) => match w {
Expand Down
Loading

0 comments on commit fe8fbe4

Please sign in to comment.