Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[blocked] Switch to Utf8View for TPC-H #476

Closed
wants to merge 10 commits into from
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
fixes
  • Loading branch information
a10y committed Aug 1, 2024
commit 1898614ccc82782e80d3f9d4991809ee03d9fe04
21 changes: 19 additions & 2 deletions vortex-array/src/array/varbinview/compute.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,11 @@ use vortex_scalar::Scalar;

use crate::array::varbin::varbin_scalar;
use crate::array::varbinview::{VarBinViewArray, VIEW_SIZE};
use crate::arrow::FromArrowArray;
use crate::compute::unary::ScalarAtFn;
use crate::compute::{slice, ArrayCompute, SliceFn};
use crate::compute::{slice, ArrayCompute, SliceFn, TakeFn};
use crate::validity::ArrayValidity;
use crate::{Array, ArrayDType, IntoArray};
use crate::{Array, ArrayDType, ArrayData, IntoArray, IntoCanonical};

impl ArrayCompute for VarBinViewArray {
fn scalar_at(&self) -> Option<&dyn ScalarAtFn> {
Expand All @@ -17,6 +18,10 @@ impl ArrayCompute for VarBinViewArray {
fn slice(&self) -> Option<&dyn SliceFn> {
Some(self)
}

fn take(&self) -> Option<&dyn TakeFn> {
Some(self)
}
}

impl ScalarAtFn for VarBinViewArray {
Expand All @@ -43,3 +48,15 @@ impl SliceFn for VarBinViewArray {
.into_array())
}
}

impl TakeFn for VarBinViewArray {
fn take(&self, indices: &Array) -> VortexResult<Array> {
let array_arrow = self.clone().into_canonical()?.into_arrow();
let indices_arrow = indices.clone().into_canonical()?.into_arrow();

let take_arrow = arrow_select::take::take(&array_arrow, &indices_arrow, None)?;
let nullable = take_arrow.is_nullable();

Ok(ArrayData::from_arrow(take_arrow, nullable).into_array())
}
}
4 changes: 2 additions & 2 deletions vortex-array/src/canonical.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,8 @@ use crate::{Array, IntoArray};
/// decompress it later to pass to a compute kernel, there are multiple suitable Arrow array
/// variants to hold the data.
///
/// To disambiguate, we choose a canonical physical encoding for every Vortex [`DType`], which
/// will correspond to an arrow-rs [`arrow_schema::DataType`].
/// To disambiguate, we choose a canonical physical encoding for every Vortex
/// [`vortex_dtype::DType`], which will correspond to an arrow-rs [`arrow_schema::DataType`].
///
/// # Views support
///
Expand Down
8 changes: 4 additions & 4 deletions vortex-datafusion/src/datatype.rs
Original file line number Diff line number Diff line change
Expand Up @@ -154,12 +154,12 @@ mod test {

assert_eq!(
infer_data_type(&DType::Utf8(Nullability::NonNullable)),
DataType::Utf8
DataType::Utf8View
);

assert_eq!(
infer_data_type(&DType::Binary(Nullability::NonNullable)),
DataType::Binary
DataType::BinaryView
);

assert_eq!(
Expand All @@ -184,7 +184,7 @@ mod test {
)),
DataType::Struct(Fields::from(vec![
FieldRef::from(Field::new("field_a", DataType::Boolean, false)),
FieldRef::from(Field::new("field_b", DataType::Utf8, true)),
FieldRef::from(Field::new("field_b", DataType::Utf8View, true)),
]))
);
}
Expand All @@ -207,7 +207,7 @@ mod test {
infer_schema(&schema_nonnull),
Schema::new(Fields::from(vec![
Field::new("field_a", DataType::Boolean, false),
Field::new("field_b", DataType::Utf8, false),
Field::new("field_b", DataType::Utf8View, false),
Field::new("field_c", DataType::Int32, true),
]))
);
Expand Down
22 changes: 9 additions & 13 deletions vortex-datafusion/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -478,25 +478,21 @@ mod test {
use datafusion_expr::{and, col, lit, BinaryExpr, Expr, Operator};
use vortex::array::primitive::PrimitiveArray;
use vortex::array::struct_::StructArray;
use vortex::array::varbin::VarBinArray;
use vortex::array::varbinview::VarBinViewArray;
use vortex::validity::Validity;
use vortex::{Array, IntoArray};
use vortex_dtype::{DType, Nullability};

use crate::{can_be_pushed_down, SessionContextExt, VortexMemTableOptions};

fn presidents_array() -> Array {
let names = VarBinArray::from_vec(
vec![
"Washington",
"Adams",
"Jefferson",
"Madison",
"Monroe",
"Adams",
],
DType::Utf8(Nullability::NonNullable),
);
let names = VarBinViewArray::from_iter_str(vec![
"Washington",
"Adams",
"Jefferson",
"Madison",
"Monroe",
"Adams",
]);
let term_start = PrimitiveArray::from_vec(
vec![1789u16, 1797, 1801, 1809, 1817, 1825],
Validity::NonNullable,
Expand Down
Loading