From f9a35100bc50bce95230ac2e789e3cb2a58f3d3f Mon Sep 17 00:00:00 2001 From: Josh Casale Date: Fri, 12 Apr 2024 09:59:21 +0100 Subject: [PATCH] (nit) Make csv format instantiation nicer (#228) --- bench-vortex/src/public_bi_data.rs | 23 ++++------------------- bench-vortex/src/reader.rs | 5 ++--- 2 files changed, 6 insertions(+), 22 deletions(-) diff --git a/bench-vortex/src/public_bi_data.rs b/bench-vortex/src/public_bi_data.rs index 3f184d122a..c480d08e01 100644 --- a/bench-vortex/src/public_bi_data.rs +++ b/bench-vortex/src/public_bi_data.rs @@ -18,8 +18,7 @@ use crate::data_downloads::{ }; use crate::public_bi_data::PBIDataset::*; use crate::reader::{ - compress_csv_to_vortex, default_csv_format, open_vortex, write_csv_as_parquet, - write_csv_to_vortex, + compress_csv_to_vortex, open_vortex, pbi_csv_format, write_csv_as_parquet, write_csv_to_vortex, }; use crate::{idempotent, IdempotentPath}; @@ -440,13 +439,8 @@ impl BenchmarkDataset for BenchmarkDatasets { &path_for_file_type(self, output_fname, "parquet"), |output_path| { let mut write = File::create(output_path).unwrap(); - let delimiter = u8::try_from('|').unwrap(); let csv_input = f; - write_csv_as_parquet( - csv_input, - default_csv_format().with_delimiter(delimiter), - &mut write, - ) + write_csv_as_parquet(csv_input, pbi_csv_format(), &mut write) }, ) .expect("Failed to compress to parquet"); @@ -466,11 +460,7 @@ impl BenchmarkDataset for BenchmarkDatasets { .into_iter() .map(|csv_input| { info!("Compressing {} to vortex", csv_input.to_str().unwrap()); - compress_csv_to_vortex( - csv_input, - default_csv_format().with_delimiter(u8::try_from('|').unwrap()), - ) - .1 + compress_csv_to_vortex(csv_input, pbi_csv_format()).1 }) .collect_vec() } @@ -490,13 +480,8 @@ impl BenchmarkDataset for BenchmarkDatasets { &path_for_file_type(self, output_fname, "vortex"), |output_path| { let mut write = File::create(output_path).unwrap(); - let delimiter = u8::try_from('|').unwrap(); let csv_input = f; - write_csv_to_vortex( - csv_input, - default_csv_format().with_delimiter(delimiter), - &mut write, - ) + write_csv_to_vortex(csv_input, pbi_csv_format(), &mut write) }, ) .expect("Failed to compress to vortex"); diff --git a/bench-vortex/src/reader.rs b/bench-vortex/src/reader.rs index 775747ff57..17678c5739 100644 --- a/bench-vortex/src/reader.rs +++ b/bench-vortex/src/reader.rs @@ -38,7 +38,6 @@ use crate::{chunks_to_array, compress_ctx}; pub const BATCH_SIZE: usize = 65_536; pub const CSV_SCHEMA_SAMPLE_ROWS: usize = 10_000_000; -const DEFAULT_DELIMITER: u8 = b','; pub fn open_vortex(path: &Path) -> VortexResult { let mut file = File::open(path)?; @@ -81,9 +80,9 @@ fn compress_parquet_to_vortex(parquet_path: &Path) -> Result<(DType, ChunkedArra Ok((dtype, chunked)) } -pub fn default_csv_format() -> Format { +pub fn pbi_csv_format() -> Format { Format::default() - .with_delimiter(DEFAULT_DELIMITER) + .with_delimiter(b'|') .with_header(false) .with_null_regex("null".parse().unwrap()) }