Skip to content

Commit

Permalink
(nit) Make csv format instantiation nicer (#228)
Browse files Browse the repository at this point in the history
  • Loading branch information
jdcasale authored Apr 12, 2024
1 parent 705c3af commit f9a3510
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 22 deletions.
23 changes: 4 additions & 19 deletions bench-vortex/src/public_bi_data.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,7 @@ use crate::data_downloads::{
};
use crate::public_bi_data::PBIDataset::*;
use crate::reader::{
compress_csv_to_vortex, default_csv_format, open_vortex, write_csv_as_parquet,
write_csv_to_vortex,
compress_csv_to_vortex, open_vortex, pbi_csv_format, write_csv_as_parquet, write_csv_to_vortex,
};
use crate::{idempotent, IdempotentPath};

Expand Down Expand Up @@ -440,13 +439,8 @@ impl BenchmarkDataset for BenchmarkDatasets {
&path_for_file_type(self, output_fname, "parquet"),
|output_path| {
let mut write = File::create(output_path).unwrap();
let delimiter = u8::try_from('|').unwrap();
let csv_input = f;
write_csv_as_parquet(
csv_input,
default_csv_format().with_delimiter(delimiter),
&mut write,
)
write_csv_as_parquet(csv_input, pbi_csv_format(), &mut write)
},
)
.expect("Failed to compress to parquet");
Expand All @@ -466,11 +460,7 @@ impl BenchmarkDataset for BenchmarkDatasets {
.into_iter()
.map(|csv_input| {
info!("Compressing {} to vortex", csv_input.to_str().unwrap());
compress_csv_to_vortex(
csv_input,
default_csv_format().with_delimiter(u8::try_from('|').unwrap()),
)
.1
compress_csv_to_vortex(csv_input, pbi_csv_format()).1
})
.collect_vec()
}
Expand All @@ -490,13 +480,8 @@ impl BenchmarkDataset for BenchmarkDatasets {
&path_for_file_type(self, output_fname, "vortex"),
|output_path| {
let mut write = File::create(output_path).unwrap();
let delimiter = u8::try_from('|').unwrap();
let csv_input = f;
write_csv_to_vortex(
csv_input,
default_csv_format().with_delimiter(delimiter),
&mut write,
)
write_csv_to_vortex(csv_input, pbi_csv_format(), &mut write)
},
)
.expect("Failed to compress to vortex");
Expand Down
5 changes: 2 additions & 3 deletions bench-vortex/src/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ use crate::{chunks_to_array, compress_ctx};

pub const BATCH_SIZE: usize = 65_536;
pub const CSV_SCHEMA_SAMPLE_ROWS: usize = 10_000_000;
const DEFAULT_DELIMITER: u8 = b',';

pub fn open_vortex(path: &Path) -> VortexResult<ArrayRef> {
let mut file = File::open(path)?;
Expand Down Expand Up @@ -81,9 +80,9 @@ fn compress_parquet_to_vortex(parquet_path: &Path) -> Result<(DType, ChunkedArra
Ok((dtype, chunked))
}

pub fn default_csv_format() -> Format {
pub fn pbi_csv_format() -> Format {
Format::default()
.with_delimiter(DEFAULT_DELIMITER)
.with_delimiter(b'|')
.with_header(false)
.with_null_regex("null".parse().unwrap())
}
Expand Down

0 comments on commit f9a3510

Please sign in to comment.