Skip to content

Commit

Permalink
Array Data + View (#210)
Browse files Browse the repository at this point in the history
Prototype of refactoring into ArrayData + ArrayView.
  • Loading branch information
gatesn authored Apr 6, 2024
1 parent 1bec4c2 commit 025e5c7
Show file tree
Hide file tree
Showing 17 changed files with 1,190 additions and 0 deletions.
14 changes: 14 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ members = [
"vortex-alloc",
"vortex-alp",
"vortex-array",
"vortex-array2",
"vortex-datetime",
"vortex-dict",
"vortex-error",
Expand Down
24 changes: 24 additions & 0 deletions vortex-array2/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
[package]
name = "vortex-array2"
version.workspace = true
homepage.workspace = true
repository.workspace = true
authors.workspace = true
license.workspace = true
keywords.workspace = true
include.workspace = true
edition.workspace = true
rust-version.workspace = true

[dependencies]
arrow-buffer = { workspace = true }
flatbuffers = { workspace = true }
half = { workspace = true }
paste = { workspace = true }
vortex-array = { path = "../vortex-array" }
vortex-error = { path = "../vortex-error" }
vortex-flatbuffers = { path = "../vortex-flatbuffers" }
vortex-schema = { path = "../vortex-schema" }

[lints]
workspace = true
55 changes: 55 additions & 0 deletions vortex-array2/src/compute.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
use vortex::scalar::Scalar;
use vortex_error::{vortex_err, VortexResult};

use crate::primitive::PrimitiveData;
use crate::{Array, WithArray};

pub trait ArrayCompute {
fn scalar_at(&self) -> Option<&dyn ScalarAtFn> {
None
}
fn flatten(&self) -> Option<&dyn FlattenFn> {
None
}
}

pub trait ScalarAtFn {
fn scalar_at(&self, index: usize) -> VortexResult<Scalar>;
}

pub fn scalar_at(array: &Array, index: usize) -> VortexResult<Scalar> {
array.with_array(|a| {
a.scalar_at()
.ok_or_else(|| vortex_err!("Not implemented: scalar_at"))?
.scalar_at(index)
})
}

pub trait FlattenFn {
fn flatten(&self) -> VortexResult<FlattenedArray>;
}

pub enum FlattenedArray {
Primitive(PrimitiveData),
// Just to introduce a second variant for now
Other(String),
}

pub fn flatten(array: &Array) -> VortexResult<FlattenedArray> {
array.with_array(|a| {
a.flatten()
.ok_or_else(|| vortex_err!("Not implemented: flatten"))?
.flatten()
})
}

pub fn flatten_primitive(array: &Array) -> VortexResult<PrimitiveData> {
if let FlattenedArray::Primitive(p) = flatten(array)? {
Ok(p)
} else {
Err(vortex_err!(
"Cannot flatten array {:?} into primitive",
array
))
}
}
39 changes: 39 additions & 0 deletions vortex-array2/src/context.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
use std::sync::Arc;

use vortex::encoding::EncodingId;

use crate::encoding::EncodingRef;

#[derive(Debug)]
pub struct SerdeContext {
encodings: Arc<[EncodingRef]>,
}

impl SerdeContext {
pub fn new(encodings: Arc<[EncodingRef]>) -> Self {
Self { encodings }
}

pub fn encodings(&self) -> &[EncodingRef] {
self.encodings.as_ref()
}

pub fn find_encoding(&self, encoding_id: u16) -> Option<EncodingRef> {
self.encodings.get(encoding_id as usize).cloned()
}

pub fn encoding_idx(&self, encoding_id: EncodingId) -> Option<u16> {
self.encodings
.iter()
.position(|e| e.id() == encoding_id)
.map(|i| i as u16)
}
}

impl Default for SerdeContext {
fn default() -> Self {
Self {
encodings: vec![].into(), // ENCODINGS.iter().cloned().collect_vec().into(),
}
}
}
148 changes: 148 additions & 0 deletions vortex-array2/src/data.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
use std::marker::PhantomData;
use std::sync::Arc;

use arrow_buffer::Buffer;
use vortex_error::{vortex_bail, VortexError, VortexResult};
use vortex_schema::DType;

use crate::encoding::EncodingRef;
use crate::{Array, ArrayDef, ArrayMetadata, IntoArray, ToArray};

#[allow(dead_code)]
#[derive(Clone, Debug)]
pub struct ArrayData {
encoding: EncodingRef,
dtype: DType,
metadata: Arc<dyn ArrayMetadata>,
buffers: Arc<[Buffer]>,
children: Arc<[ArrayData]>,
}

impl ArrayData {
pub fn try_new(
encoding: EncodingRef,
dtype: DType,
metadata: Arc<dyn ArrayMetadata>,
buffers: Arc<[Buffer]>,
children: Arc<[ArrayData]>,
) -> VortexResult<Self> {
let data = Self {
encoding,
dtype,
metadata,
buffers,
children,
};

// Validate here that the metadata correctly parses, so that an encoding can infallibly
// implement Encoding::with_data().
encoding.with_data_mut(&data, &mut |_| Ok(()))?;

Ok(data)
}
}

impl ArrayData {
pub fn encoding(&self) -> EncodingRef {
self.encoding
}

pub fn dtype(&self) -> &DType {
&self.dtype
}

pub fn metadata(&self) -> &Arc<dyn ArrayMetadata> {
&self.metadata
}

pub fn buffers(&self) -> &[Buffer] {
&self.buffers
}

pub fn children(&self) -> &[ArrayData] {
&self.children
}
}

impl ToArray for ArrayData {
fn to_array(&self) -> Array {
Array::DataRef(self)
}
}

impl IntoArray<'static> for ArrayData {
fn into_array(self) -> Array<'static> {
Array::Data(self)
}
}

pub struct TypedArrayData<D: ArrayDef> {
data: ArrayData,
phantom: PhantomData<D>,
}

impl<D: ArrayDef> TypedArrayData<D>
where
Self: for<'a> AsRef<D::Array<'a>>,
{
pub fn new_unchecked(data: ArrayData) -> Self {
Self {
data,
phantom: PhantomData,
}
}

pub fn data(&self) -> &ArrayData {
&self.data
}

pub fn into_data(self) -> ArrayData {
self.data
}

pub fn metadata(&self) -> &D::Metadata {
self.data
.metadata()
.as_any()
.downcast_ref::<D::Metadata>()
.unwrap()
}

pub fn into_metadata(self) -> Arc<D::Metadata> {
self.data
.metadata
.as_any_arc()
.downcast::<D::Metadata>()
.unwrap()
}

pub fn as_array(&self) -> &D::Array<'_> {
self.as_ref()
}
}

impl<D: ArrayDef> ToArray for TypedArrayData<D> {
fn to_array(&self) -> Array {
Array::DataRef(&self.data)
}
}

impl<D: ArrayDef> IntoArray<'static> for TypedArrayData<D> {
fn into_array(self) -> Array<'static> {
Array::Data(self.data)
}
}

impl<D: ArrayDef> TryFrom<ArrayData> for TypedArrayData<D> {
type Error = VortexError;

fn try_from(data: ArrayData) -> Result<Self, Self::Error> {
if data.encoding().id() != D::ID {
vortex_bail!("Invalid encoding for array")
}
Ok(Self {
data,
phantom: PhantomData,
})
}
}
69 changes: 69 additions & 0 deletions vortex-array2/src/encoding.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
use std::fmt::{Debug, Formatter};

pub use vortex::encoding::EncodingId;
use vortex_error::VortexResult;

use crate::ArrayView;
use crate::{ArrayData, ArrayTrait};

pub type EncodingRef = &'static dyn ArrayEncoding;

/// Dynamic trait representing an array type.
#[allow(dead_code)]
pub trait ArrayEncoding {
fn id(&self) -> EncodingId;

fn with_view_mut<'v>(
&self,
view: &'v ArrayView<'v>,
f: &mut dyn FnMut(&dyn ArrayTrait) -> VortexResult<()>,
) -> VortexResult<()>;

fn with_data_mut(
&self,
data: &ArrayData,
f: &mut dyn FnMut(&dyn ArrayTrait) -> VortexResult<()>,
) -> VortexResult<()>;
}

impl Debug for dyn ArrayEncoding + '_ {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
Debug::fmt(&self.id(), f)
}
}

impl dyn ArrayEncoding {
pub(crate) fn with_view<'v, R, F: Fn(&dyn ArrayTrait) -> R>(
&self,
view: &'v ArrayView<'v>,
f: F,
) -> R {
let mut result = None;

// Unwrap the result. This is safe since we validate that encoding against the
// ArrayData during ArrayData::try_new.
self.with_view_mut(view, &mut |array| {
result = Some(f(array));
Ok(())
})
.unwrap();

// Now we unwrap the optional, which we know to be populated in the closure.
result.unwrap()
}

pub(crate) fn with_data<R, F: Fn(&dyn ArrayTrait) -> R>(&self, data: &ArrayData, f: F) -> R {
let mut result = None;

// Unwrap the result. This is safe since we validate that encoding against the
// ArrayData during ArrayData::try_new.
self.with_data_mut(data, &mut |array| {
result = Some(f(array));
Ok(())
})
.unwrap();

// Now we unwrap the optional, which we know to be populated in the closure.
result.unwrap()
}
}
Loading

0 comments on commit 025e5c7

Please sign in to comment.