Skip to content

Commit

Permalink
Add a lance benchmark
Browse files Browse the repository at this point in the history
  • Loading branch information
gatesn committed Mar 27, 2024
1 parent 058752a commit c5efd2b
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 26 deletions.
8 changes: 2 additions & 6 deletions bench-vortex/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,14 +28,10 @@ use vortex_schema::DType;
pub mod reader;
pub mod taxi_data;

pub fn idempotent<T, E>(
name: &str,
f: impl FnOnce(&mut File) -> Result<T, E>,
) -> Result<PathBuf, E> {
pub fn idempotent<T, E>(name: &str, f: impl FnOnce(&Path) -> Result<T, E>) -> Result<PathBuf, E> {
let path = data_path(name);
if !path.exists() {
let mut file = File::create(&path).unwrap();
f(&mut file)?;
f(&path)?;
}
Ok(path.to_path_buf())
}
Expand Down
41 changes: 21 additions & 20 deletions bench-vortex/src/taxi_data.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,17 @@ use vortex::arrow::FromArrowType;
use vortex::serde::WriteCtx;
use vortex_schema::DType;

use crate::idempotent;
use crate::reader::compress_vortex;
use crate::{data_path, idempotent};

fn download_taxi_data() -> PathBuf {
idempotent("yellow-tripdata-2023-11.parquet", |file| {
idempotent("yellow-tripdata-2023-11.parquet", |path| {
let mut file = File::create(path).unwrap();
reqwest::blocking::get(
"https://d37ci6vzurychx.cloudfront.net/trip-data/yellow_tripdata_2023-11.parquet",
)
.unwrap()
.copy_to(file)
.copy_to(&mut file)
})
.unwrap()
}
Expand All @@ -34,28 +35,26 @@ pub fn taxi_data_parquet() -> PathBuf {
}

pub fn taxi_data_lance() -> PathBuf {
let write_params = WriteParams::default();
idempotent("taxi.lance", |path| {
let write_params = WriteParams::default();

let read = File::open(taxi_data_parquet()).unwrap();
let reader = LanceParquetRecordBatchReaderBuilder::try_new(read)
.unwrap()
.build()
.unwrap();
let read = File::open(taxi_data_parquet()).unwrap();
let reader = LanceParquetRecordBatchReaderBuilder::try_new(read)
.unwrap()
.build()
.unwrap();

let lance_path = data_path("taxi.lance");
Runtime::new()
.unwrap()
.block_on(Dataset::write(
Runtime::new().unwrap().block_on(Dataset::write(
reader,
lance_path.as_os_str().to_str().unwrap(),
path.to_str().unwrap(),
Some(write_params),
))
.unwrap();
lance_path
})
.unwrap()
}

pub fn taxi_data_vortex() -> PathBuf {
idempotent("taxi-uncompressed.vortex", |write| {
idempotent("taxi-uncompressed.vortex", |path| {
let taxi_pq = File::open(download_taxi_data()).unwrap();
let builder = ParquetRecordBatchReaderBuilder::try_new(taxi_pq).unwrap();

Expand All @@ -70,16 +69,18 @@ pub fn taxi_data_vortex() -> PathBuf {
.collect_vec();
let chunked = ChunkedArray::new(chunks, dtype.clone());

let mut write_ctx = WriteCtx::new(write);
let mut write = File::create(path).unwrap();
let mut write_ctx = WriteCtx::new(&mut write);
write_ctx.dtype(&dtype)?;
write_ctx.write(&chunked)
})
.unwrap()
}

pub fn taxi_data_vortex_compressed() -> PathBuf {
idempotent("taxi.vortex", |write| {
compress_vortex(&taxi_data_parquet(), write)
idempotent("taxi.vortex", |path| {
let mut write = File::create(path).unwrap();
compress_vortex(&taxi_data_parquet(), &mut write)
})
.unwrap()
}

0 comments on commit c5efd2b

Please sign in to comment.