Skip to content

Commit

Permalink
rename
Browse files Browse the repository at this point in the history
  • Loading branch information
robert3005 committed Mar 20, 2024
1 parent 2d2a656 commit 9040800
Show file tree
Hide file tree
Showing 7 changed files with 91 additions and 122 deletions.
29 changes: 16 additions & 13 deletions bench-vortex/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,17 +1,18 @@
use std::collections::HashSet;
use std::fs::{create_dir_all, File};
use std::path::{Path, PathBuf};
use std::sync::Arc;

use arrow_array::RecordBatchReader;
use itertools::Itertools;
use log::info;
use parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder;
use parquet::arrow::ProjectionMask;
use std::collections::HashSet;
use std::fs::{create_dir_all, File};
use std::path::{Path, PathBuf};
use std::sync::Arc;

use vortex::array::bool::BoolEncoding;
use vortex::array::chunked::{ChunkedArray, ChunkedEncoding};
use vortex::array::constant::ConstantEncoding;

use vortex::array::composite::CompositeEncoding;
use vortex::array::constant::ConstantEncoding;
use vortex::array::downcast::DowncastArrayBuiltin;
use vortex::array::primitive::PrimitiveEncoding;
use vortex::array::sparse::SparseEncoding;
Expand Down Expand Up @@ -117,7 +118,7 @@ pub fn compress_taxi_data() -> ArrayRef {
})
.collect_vec();

let dtype = DType::from_arrow_type(schema.clone());
let dtype = DType::from_arrow(schema.clone());
let compressed = ChunkedArray::new(chunks.clone(), dtype).boxed();

info!("Compressed array {}", display_tree(compressed.as_ref()));
Expand All @@ -143,13 +144,15 @@ pub fn compress_taxi_data() -> ArrayRef {

#[cfg(test)]
mod test {
use std::fs::File;
use std::ops::Deref;
use std::sync::Arc;

use arrow_array::{ArrayRef as ArrowArrayRef, StructArray as ArrowStructArray};
use log::LevelFilter;
use parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder;
use simplelog::{ColorChoice, Config, TermLogger, TerminalMode};
use std::fs::File;
use std::ops::Deref;
use std::sync::Arc;

use vortex::array::ArrayRef;
use vortex::compute::as_arrow::as_arrow;
use vortex::encode::FromArrowArray;
Expand Down Expand Up @@ -185,7 +188,7 @@ mod test {
for record_batch in reader.map(|batch_result| batch_result.unwrap()) {
let struct_arrow: ArrowStructArray = record_batch.into();
let arrow_array: ArrowArrayRef = Arc::new(struct_arrow);
let vortex_array = ArrayRef::from_arrow_array(arrow_array.clone(), false);
let vortex_array = ArrayRef::from_arrow(arrow_array.clone(), false);

let mut buf = Vec::<u8>::new();
let mut write_ctx = WriteCtx::new(&mut buf);
Expand All @@ -207,7 +210,7 @@ mod test {
for record_batch in reader.map(|batch_result| batch_result.unwrap()) {
let struct_arrow: ArrowStructArray = record_batch.into();
let arrow_array: ArrowArrayRef = Arc::new(struct_arrow);
let vortex_array = ArrayRef::from_arrow_array(arrow_array.clone(), false);
let vortex_array = ArrayRef::from_arrow(arrow_array.clone(), false);
let vortex_as_arrow = as_arrow(vortex_array.as_ref()).unwrap();
assert_eq!(vortex_as_arrow.deref(), arrow_array.deref());
}
Expand All @@ -226,7 +229,7 @@ mod test {
for record_batch in reader.map(|batch_result| batch_result.unwrap()) {
let struct_arrow: ArrowStructArray = record_batch.into();
let arrow_array: ArrowArrayRef = Arc::new(struct_arrow);
let vortex_array = ArrayRef::from_arrow_array(arrow_array.clone(), false);
let vortex_array = ArrayRef::from_arrow(arrow_array.clone(), false);

let compressed = ctx.clone().compress(vortex_array.as_ref(), None).unwrap();
let compressed_as_arrow = as_arrow(compressed.as_ref()).unwrap();
Expand Down
2 changes: 1 addition & 1 deletion pyvortex/src/dtype.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ impl PyDType {
) -> PyResult<Py<Self>> {
PyDType::wrap(
cls.py(),
DType::from_arrow_type(&Field::new("_", arrow_dtype, nullable)),
DType::from_arrow(&Field::new("_", arrow_dtype, nullable)),
)
}
}
Expand Down
8 changes: 4 additions & 4 deletions pyvortex/src/encode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ pub fn encode(obj: &PyAny) -> PyResult<Py<PyArray>> {

if obj.is_instance(pa_array)? {
let arrow_array = ArrayData::from_pyarrow(obj).map(make_array)?;
let enc_array = ArrayRef::from_arrow_array(arrow_array, false);
let enc_array = ArrayRef::from_arrow(arrow_array, false);
PyArray::wrap(obj.py(), enc_array)
} else if obj.is_instance(chunked_array)? {
let chunks: Vec<&PyAny> = obj.getattr("chunks")?.extract()?;
Expand All @@ -34,17 +34,17 @@ pub fn encode(obj: &PyAny) -> PyResult<Py<PyArray>> {
.map(|a| {
ArrayData::from_pyarrow(a)
.map(make_array)
.map(|a| ArrayRef::from_arrow_array(a, false))
.map(|a| ArrayRef::from_arrow(a, false))
})
.collect::<PyResult<Vec<ArrayRef>>>()?;
let dtype: DType = obj
.getattr("type")
.and_then(DataType::from_pyarrow)
.map(|dt| DType::from_arrow_type(&Field::new("_", dt, false)))?;
.map(|dt| DType::from_arrow(&Field::new("_", dt, false)))?;
PyArray::wrap(obj.py(), ChunkedArray::new(encoded_chunks, dtype).boxed())
} else if obj.is_instance(table)? {
let array_stream = ArrowArrayStreamReader::from_pyarrow(obj)?;
let dtype = DType::from_arrow_type(array_stream.schema());
let dtype = DType::from_arrow(array_stream.schema());
let chunks = array_stream
.into_iter()
.map(|b| b.map(ArrayRef::from).map_err(map_arrow_err))
Expand Down
3 changes: 2 additions & 1 deletion vortex-array/src/array/composite/typed.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
use std::fmt::Debug;
use std::sync::Arc;

use vortex_schema::CompositeID;

use crate::array::composite::array::CompositeArray;
use crate::array::composite::CompositeMetadata;
use crate::array::{Array, ArrayRef};
Expand Down Expand Up @@ -95,4 +97,3 @@ macro_rules! composite_impl {
}

pub(crate) use composite_impl;
use vortex_schema::CompositeID;
19 changes: 7 additions & 12 deletions vortex-array/src/arrow/dtypes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,8 @@ impl From<RecordBatch> for ArrayRef {
.map(|(array, field)| {
// The dtype of the child arrays infer their nullability from the array itself.
// In case the schema says something different, we cast into the schema's dtype.
let vortex_array =
ArrayRef::from_arrow_array(array.clone(), field.is_nullable());
cast(
vortex_array.as_ref(),
&DType::from_arrow_type(field.as_ref()),
)
.unwrap()
let vortex_array = ArrayRef::from_arrow(array.clone(), field.is_nullable());
cast(vortex_array.as_ref(), &DType::from_arrow(field.as_ref())).unwrap()
})
.collect(),
)
Expand Down Expand Up @@ -75,7 +70,7 @@ impl TryFrom<&DataType> for PType {
}

impl FromArrowType<SchemaRef> for DType {
fn from_arrow_type(value: SchemaRef) -> Self {
fn from_arrow(value: SchemaRef) -> Self {
DType::Struct(
value
.fields()
Expand All @@ -85,14 +80,14 @@ impl FromArrowType<SchemaRef> for DType {
value
.fields()
.iter()
.map(|f| DType::from_arrow_type(f.as_ref()))
.map(|f| DType::from_arrow(f.as_ref()))
.collect_vec(),
)
}
}

impl FromArrowType<&Field> for DType {
fn from_arrow_type(field: &Field) -> Self {
fn from_arrow(field: &Field) -> Self {
use vortex_schema::DType::*;
use vortex_schema::Signedness::*;

Expand Down Expand Up @@ -123,12 +118,12 @@ impl FromArrowType<&Field> for DType {
// DataType::Time32(u) => localtime(u.into(), IntWidth::_32, nullability),
// DataType::Time64(u) => localtime(u.into(), IntWidth::_64, nullability),
DataType::List(e) | DataType::LargeList(e) => {
List(Box::new(DType::from_arrow_type(e.as_ref())), nullability)
List(Box::new(DType::from_arrow(e.as_ref())), nullability)
}
DataType::Struct(f) => Struct(
f.iter().map(|f| Arc::new(f.name().clone())).collect(),
f.iter()
.map(|f| DType::from_arrow_type(f.as_ref()))
.map(|f| DType::from_arrow(f.as_ref()))
.collect_vec(),
),
DataType::Decimal128(p, s) | DataType::Decimal256(p, s) => Decimal(*p, *s, nullability),
Expand Down
2 changes: 1 addition & 1 deletion vortex-array/src/arrow/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@ pub mod dtypes;
pub mod wrappers;

pub trait FromArrowType<T>: Sized {
fn from_arrow_type(value: T) -> Self;
fn from_arrow(value: T) -> Self;
}
Loading

0 comments on commit 9040800

Please sign in to comment.