From aae1f711be6d34e15d10d7d8ba396b3ab326b9d5 Mon Sep 17 00:00:00 2001 From: Seth Date: Thu, 15 Jul 2021 09:17:24 -0600 Subject: [PATCH] [bugfix] issue 26 (#27) * Fix issue 26 * update changelog * Clippy * Maybe fix deb name --- .github/workflows/publish.yml | 2 +- CHANGELOG.md | 4 +- Cargo.lock | 8 +- src/lib/core.rs | 215 +++++++++++++++++++++++++------ src/lib/field_range.rs | 41 ++++-- src/main.rs | 235 ++++++++++++++++++---------------- 6 files changed, 337 insertions(+), 168 deletions(-) diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index d7e0a96..314817c 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -71,7 +71,7 @@ jobs: cargo install cargo-deb RUSTFLAGS="-Cllvm-args=-pgo-warn-missing-function -Cprofile-use=$(pwd)/pgo-data/merged.profdata" cargo deb deb_path=$(find ./target/debian/ -type f -name 'hck*') - asset_path="./${{ matrix.asset_name }}.deb" + asset_path="${{ matrix.asset_name }}.deb" mv "${deb_path}" "${asset_path}" echo "DEB=${asset_path}" >> $GITHUB_ENV fi diff --git a/CHANGELOG.md b/CHANGELOG.md index cd854eb..6c97246 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,8 +1,10 @@ # Changelog -## v0.5.2-alpha (in progress) +## v0.5.2 - [PR24](https://github.com/sstadick/hck/pull/24) Removed the now defunct profile guided optimization shell scripts and all references to them in favor of the `justfile` that was added in `v0.5.0` +- [Bugfix](https://github.com/sstadick/hck/issues/26) fixes incorrect handling of header line for non-stdin inputs, fixes incorrect parsing of last header fields (now strips newline before matching), fixes option parsing so that the `-F` and `-E` options wont' try to consume the positional input arguments. Huge thanks to @learnbyexample for their detailed bug report. +- Change: An error will now be raised when a specified header is not found. This differs from the convention used by the selecion-by-index, which tries to match `cut`. The reasoning is that it is generally harder to type out each header field and if a header is not found you want to know about it. ## v0.5.1 diff --git a/Cargo.lock b/Cargo.lock index a00b884..80ebed8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -573,9 +573,9 @@ checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a" [[package]] name = "structopt" -version = "0.3.21" +version = "0.3.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5277acd7ee46e63e5168a80734c9f6ee81b1367a7d8772a2d765df2a3705d28c" +checksum = "69b041cdcb67226aca307e6e7be44c8806423d83e018bd662360a93dabce4d71" dependencies = [ "clap", "lazy_static", @@ -584,9 +584,9 @@ dependencies = [ [[package]] name = "structopt-derive" -version = "0.4.14" +version = "0.4.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ba9cdfda491b814720b6b06e0cac513d922fc407582032e8706e9f137976f90" +checksum = "7813934aecf5f51a54775e00068c237de98489463968231a51746bbbc03f9c10" dependencies = [ "heck", "proc-macro-error", diff --git a/src/lib/core.rs b/src/lib/core.rs index 25beff0..3e599df 100644 --- a/src/lib/core.rs +++ b/src/lib/core.rs @@ -4,10 +4,16 @@ //! lifetime coersion to reuse the `shuffler` vector really locks down the possible options. //! //! If we go with a dyn trait on the line splitter function it is appreciably slower. -use crate::{field_range::FieldRange, line_parser::LineParser, mmap::MmapChoice}; +use crate::{ + field_range::{FieldRange, RegexOrStr}, + line_parser::LineParser, + mmap::MmapChoice, +}; +use anyhow::Result; use bstr::ByteSlice; use grep_cli::{DecompressionMatcherBuilder, DecompressionReaderBuilder}; use memchr; +use regex::bytes::Regex; use ripline::{ line_buffer::{LineBuffer, LineBufferReader}, lines::{self, LineIter}, @@ -28,28 +34,8 @@ pub enum HckInput> { Path(P), } -impl> HckInput

{ - /// Read the first line of an input and return it. - /// - /// It's up to the user to make sure that any consumed bytes are properly handed - /// off to the line parsers later on. - pub fn peek_first_line(&self) -> Result { - let mut buffer = String::new(); - match self { - HckInput::Stdin => { - io::stdin().read_line(&mut buffer)?; - } - - HckInput::Path(path) => { - BufReader::new(File::open(path)?).read_line(&mut buffer)?; - } - } - Ok(buffer) - } -} - /// The config object for [`Core`]. -#[derive(Debug, Clone, Copy)] +#[derive(Debug, Clone)] pub struct CoreConfig<'a> { delimiter: &'a [u8], output_delimiter: &'a [u8], @@ -57,6 +43,12 @@ pub struct CoreConfig<'a> { mmap_choice: MmapChoice, is_parser_regex: bool, try_decompress: bool, + raw_fields: Option<&'a str>, + raw_header_fields: Option<&'a [Regex]>, + raw_exclude: Option<&'a str>, + raw_exclude_headers: Option<&'a [Regex]>, + header_is_regex: bool, + parsed_delim: RegexOrStr<'a>, } impl<'a> Default for CoreConfig<'a> { @@ -68,24 +60,128 @@ impl<'a> Default for CoreConfig<'a> { mmap_choice: unsafe { MmapChoice::auto() }, is_parser_regex: false, try_decompress: false, + raw_fields: Some("1-"), + raw_header_fields: None, + raw_exclude: None, + raw_exclude_headers: None, + header_is_regex: false, + parsed_delim: RegexOrStr::Str(DEFAULT_DELIM.to_str().unwrap()), } } } impl<'a> CoreConfig<'a> { - #[inline] - pub fn is_parser_regex(&self) -> bool { - self.is_parser_regex + /// Get the parsed delimiter + pub fn parsed_delim(&self) -> &RegexOrStr<'a> { + &self.parsed_delim } - #[inline] - pub fn delimiter(&self) -> &[u8] { - self.delimiter + /// Read the first line of an input and return it. + /// + /// It's up to the user to make sure that any consumed bytes are properly handed + /// off to the line parsers later on. + pub fn peek_first_line>( + &self, + input: &HckInput

, + ) -> Result, io::Error> { + let mut buffer = String::new(); + match input { + HckInput::Stdin => { + io::stdin().read_line(&mut buffer)?; + } + + HckInput::Path(path) => { + if self.try_decompress { + let mut reader = + BufReader::new(DecompressionReaderBuilder::new().build(&path)?); + reader.read_line(&mut buffer)?; + } else { + BufReader::new(File::open(path)?).read_line(&mut buffer)?; + } + } + } + Ok(lines::without_terminator(buffer.as_bytes(), self.line_terminator).to_owned()) + } + + /// Parse the raw user input fields and header fields. Returns any header bytes read and the parsed fields + pub fn parse_fields

(&self, input: &HckInput

) -> Result<(Option>, Vec)> + where + P: AsRef, + { + // Parser the fields in the context of the files being looked at + let (mut extra, fields) = match (self.raw_fields, self.raw_header_fields) { + (Some(field_list), Some(header_fields)) => { + let first_line = self.peek_first_line(&input)?; + let mut fields = FieldRange::from_list(field_list)?; + let header_fields = FieldRange::from_header_list( + header_fields, + first_line.as_bytes(), + &self.parsed_delim, + self.header_is_regex, + )?; + fields.extend(header_fields.into_iter()); + FieldRange::post_process_ranges(&mut fields); + (Some(first_line), fields) + } + (Some(field_list), None) => (None, FieldRange::from_list(field_list)?), + (None, Some(header_fields)) => { + let first_line = self.peek_first_line(&input)?; + let fields = FieldRange::from_header_list( + header_fields, + first_line.as_bytes(), + &self.parsed_delim, + self.header_is_regex, + )?; + (Some(first_line), fields) + } + (None, None) => (None, FieldRange::from_list("1-")?), + }; + + let fields = match (&self.raw_exclude, &self.raw_exclude_headers) { + (Some(exclude), Some(exclude_header)) => { + let exclude = FieldRange::from_list(exclude)?; + let fields = FieldRange::exclude(fields, exclude); + let first_line = if let Some(first_line) = extra { + first_line + } else { + self.peek_first_line(&input)? + }; + let exclude_headers = FieldRange::from_header_list( + &exclude_header, + first_line.as_bytes(), + &self.parsed_delim, + self.header_is_regex, + )?; + extra = Some(first_line); + FieldRange::exclude(fields, exclude_headers) + } + (Some(exclude), None) => { + let exclude = FieldRange::from_list(exclude)?; + FieldRange::exclude(fields, exclude) + } + (None, Some(exclude_header)) => { + let first_line = if let Some(first_line) = extra { + first_line + } else { + self.peek_first_line(&input)? + }; + let exclude_headers = FieldRange::from_header_list( + &exclude_header, + first_line.as_bytes(), + &self.parsed_delim, + self.header_is_regex, + )?; + extra = Some(first_line); + FieldRange::exclude(fields, exclude_headers) + } + (None, None) => fields, + }; + Ok((extra, fields)) } } /// A builder for the [`CoreConfig`] which drives [`Core`]. -#[derive(Copy, Clone, Debug)] +#[derive(Clone, Debug)] pub struct CoreConfigBuilder<'a> { config: CoreConfig<'a>, } @@ -97,45 +193,82 @@ impl<'a> CoreConfigBuilder<'a> { } } - pub fn build(self) -> CoreConfig<'a> { - self.config + pub fn build(mut self) -> Result> { + let delim = if self.config.is_parser_regex { + RegexOrStr::Regex(Regex::new(self.config.delimiter.to_str()?)?) + } else { + RegexOrStr::Str(self.config.delimiter.to_str()?) + }; + self.config.parsed_delim = delim; + Ok(self.config) } /// The substr to split lines on. - pub fn delimiter(&mut self, delim: &'a [u8]) -> &mut Self { + pub fn delimiter(mut self, delim: &'a [u8]) -> Self { self.config.delimiter = delim; self } /// The substr to use as the output delimiter - pub fn output_delimiter(&mut self, delim: &'a [u8]) -> &mut Self { + pub fn output_delimiter(mut self, delim: &'a [u8]) -> Self { self.config.output_delimiter = delim; self } /// The line terminator to use when looking for linebreaks and stripping linebreach chars. - pub fn line_terminator(&mut self, term: LineTerminator) -> &mut Self { + pub fn line_terminator(mut self, term: LineTerminator) -> Self { self.config.line_terminator = term; self } /// Whether or not to try to use mmap mode - pub fn mmap(&mut self, mmap_choice: MmapChoice) -> &mut Self { + pub fn mmap(mut self, mmap_choice: MmapChoice) -> Self { self.config.mmap_choice = mmap_choice; self } /// Whether or not the parser is a regex - pub fn is_regex_parser(&mut self, is_regex: bool) -> &mut Self { + #[allow(clippy::wrong_self_convention)] + pub fn is_regex_parser(mut self, is_regex: bool) -> Self { self.config.is_parser_regex = is_regex; self } /// Try to decompress an input file - pub fn try_decompress(&mut self, try_decompress: bool) -> &mut Self { + pub fn try_decompress(mut self, try_decompress: bool) -> Self { self.config.try_decompress = try_decompress; self } + + /// The raw user input fields to output + pub fn fields(mut self, fields: Option<&'a str>) -> Self { + self.config.raw_fields = fields; + self + } + + /// The raw user input header to output + pub fn headers(mut self, headers: Option<&'a [Regex]>) -> Self { + self.config.raw_header_fields = headers; + self + } + + /// The raw user input fields to exclude + pub fn exclude(mut self, exclude: Option<&'a str>) -> Self { + self.config.raw_exclude = exclude; + self + } + + /// The raw user input headers to exclude + pub fn exclude_headers(mut self, exclude_headers: Option<&'a [Regex]>) -> Self { + self.config.raw_exclude_headers = exclude_headers; + self + } + + /// Whether or not to treat the headers as regex + pub fn header_is_regex(mut self, header_is_regex: bool) -> Self { + self.config.header_is_regex = header_is_regex; + self + } } impl<'a> Default for CoreConfigBuilder<'a> { @@ -203,18 +336,18 @@ where &mut self, input: HckInput

, mut output: W, - header: Option, + header: Option>, ) -> Result<(), io::Error> where P: AsRef, W: Write, { - if let Some(header) = header { - self.hck_bytes(header.as_bytes(), &mut output)?; - } // Dispatch to a given `hck_*` runner depending on configuration match input { HckInput::Stdin => { + if let Some(header) = header { + self.hck_bytes(header.as_bytes(), &mut output)?; + } let reader = io::stdin(); if self.allow_fastmode() { self.hck_reader_fast(reader, &mut output) diff --git a/src/lib/field_range.rs b/src/lib/field_range.rs index 3f74719..d3c8284 100644 --- a/src/lib/field_range.rs +++ b/src/lib/field_range.rs @@ -13,8 +13,10 @@ use thiserror::Error; const MAX: usize = usize::MAX; /// Errors for parsing / validating [`FieldRange`] strings. -#[derive(Error, Debug)] +#[derive(Error, Debug, PartialEq)] pub enum FieldError { + #[error("Header not found: {0}")] + HeaderNotFound(String), #[error("Fields and positions are numbered from 1: {0}")] InvalidField(usize), #[error("High end of range less than low end of range: {0}-{1}")] @@ -25,7 +27,7 @@ pub enum FieldError { NoHeadersMatched, } -#[derive(Debug)] +#[derive(Debug, Clone)] pub enum RegexOrStr<'b> { Regex(Regex), Str(&'b str), @@ -153,10 +155,12 @@ impl FieldRange { header_is_regex: bool, ) -> Result, FieldError> { let mut ranges = vec![]; + let mut found = vec![false; list.len()]; for (i, header) in delim.split(header).enumerate() { for (j, regex) in list.iter().enumerate() { if !header_is_regex { if regex.as_str().as_bytes() == header { + found[j] = true; ranges.push(FieldRange { low: i, high: i, @@ -164,6 +168,7 @@ impl FieldRange { }); } } else if regex.is_match(header) { + found[j] = true; ranges.push(FieldRange { low: i, high: i, @@ -176,6 +181,11 @@ impl FieldRange { if ranges.is_empty() { return Err(FieldError::NoHeadersMatched); } + for (i, was_found) in found.into_iter().enumerate() { + if !was_found { + return Err(FieldError::HeaderNotFound(list[i].as_str().to_owned())); + } + } FieldRange::post_process_ranges(&mut ranges); @@ -364,23 +374,36 @@ mod test { let header = b"is_cat-is-isdog-wascow-was_is_apple-12345-!$%*(_)"; let delim = Regex::new("-").unwrap(); let delim = RegexOrStr::Regex(delim); - let header_fields = vec![ - Regex::new(r"^is_.*$").unwrap(), - Regex::new("dog").unwrap(), - Regex::new(r"\$%").unwrap(), - Regex::new(r"is").unwrap(), - ]; + let header_fields = vec![Regex::new(r"is").unwrap()]; let fields = FieldRange::from_header_list(&header_fields, header, &delim, false).unwrap(); assert_eq!( vec![FieldRange { low: 1, high: 1, - pos: 3 + pos: 0 },], fields ); } + #[test] + fn test_parse_header_fields_literal_header_not_found() { + let header = b"is_cat-is-isdog-wascow-was_is_apple-12345-!$%*(_)"; + let delim = Regex::new("-").unwrap(); + let delim = RegexOrStr::Regex(delim); + let header_fields = vec![ + Regex::new(r"^is_.*$").unwrap(), + Regex::new("dog").unwrap(), + Regex::new(r"\$%").unwrap(), + Regex::new(r"is").unwrap(), + ]; + let result = FieldRange::from_header_list(&header_fields, header, &delim, false); + assert_eq!( + result.unwrap_err(), + FieldError::HeaderNotFound(String::from(r"^is_.*$")) + ); + } + #[test] #[rustfmt::skip::macros(assert_eq)] fn test_exclude_simple() { diff --git a/src/main.rs b/src/main.rs index 8aa5199..1df2a0e 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,10 +1,9 @@ use anyhow::{Context, Error, Result}; -use bstr::ByteSlice; use env_logger::Env; use grep_cli::stdout; use hcklib::{ core::{Core, CoreConfig, CoreConfigBuilder, HckInput}, - field_range::{FieldRange, RegexOrStr}, + field_range::RegexOrStr, line_parser::{RegexLineParser, SubStrLineParser}, mmap::MmapChoice, }; @@ -146,12 +145,12 @@ struct Opts { /// Headers to exclude from the output, ex: '^badfield.*$`. This is a string literal by default. /// Add the `-r` flag to treat as a regex. - #[structopt(short = "E", long)] + #[structopt(short = "E", long, multiple = true, number_of_values = 1)] exclude_header: Option>, /// A string literal or regex to select headers, ex: '^is_.*$`. This is a string literal /// by default. add the `-r` flag to treat it as a regex. - #[structopt(short = "F", long)] + #[structopt(short = "F", long, multiple = true, number_of_values = 1)] header_field: Option>, /// Treat the header_fields as regexs instead of string literals @@ -200,24 +199,30 @@ fn main() -> Result<()> { } else { LineTerminator::default() }; - conf_builder.line_terminator(line_term); + conf_builder = conf_builder.line_terminator(line_term); let mmap = if opts.no_mmap { MmapChoice::never() } else { unsafe { MmapChoice::auto() } }; - conf_builder.mmap(mmap); - conf_builder.delimiter(&opts.delimiter.as_bytes()); - conf_builder.output_delimiter(&opts.output_delimiter.as_bytes()); - conf_builder.is_regex_parser(!opts.delim_is_literal); - conf_builder.try_decompress(opts.try_decompress); - let conf = conf_builder.build(); + let conf = conf_builder + .mmap(mmap) + .delimiter(&opts.delimiter.as_bytes()) + .output_delimiter(&opts.output_delimiter.as_bytes()) + .is_regex_parser(!opts.delim_is_literal) + .try_decompress(opts.try_decompress) + .fields(opts.fields.as_deref()) + .headers(opts.header_field.as_deref()) + .exclude(opts.exclude.as_deref()) + .exclude_headers(opts.exclude_header.as_deref()) + .header_is_regex(opts.header_is_regex) + .build()?; let mut line_buffer = LineBufferBuilder::new().build(); for input in inputs.into_iter() { - if let Err(err) = run(input, &mut writer, &opts, conf, &mut line_buffer) { + if let Err(err) = run(input, &mut writer, &conf, &mut line_buffer) { if is_broken_pipe(&err) { exit(0) } @@ -232,96 +237,21 @@ fn main() -> Result<()> { fn run( input: HckInput, writer: &mut W, - opts: &Opts, - conf: CoreConfig, + conf: &CoreConfig, line_buffer: &mut LineBuffer, ) -> Result<()> { let writer = BufWriter::new(writer); - let delim = if conf.is_parser_regex() { - RegexOrStr::Regex(Regex::new(conf.delimiter().to_str()?)?) - } else { - RegexOrStr::Str(conf.delimiter().to_str()?) - }; - - // Parser the fields in the context of the files being looked at - let (mut extra, fields) = match (&opts.fields, &opts.header_field) { - (Some(field_list), Some(header_fields)) => { - let first_line = input.peek_first_line()?; - let mut fields = FieldRange::from_list(field_list)?; - let header_fields = FieldRange::from_header_list( - header_fields, - first_line.as_bytes(), - &delim, - opts.header_is_regex, - )?; - fields.extend(header_fields.into_iter()); - FieldRange::post_process_ranges(&mut fields); - (Some(first_line), fields) - } - (Some(field_list), None) => (None, FieldRange::from_list(field_list)?), - (None, Some(header_fields)) => { - let first_line = input.peek_first_line()?; - let fields = FieldRange::from_header_list( - header_fields, - first_line.as_bytes(), - &delim, - opts.header_is_regex, - )?; - (Some(first_line), fields) - } - (None, None) => (None, FieldRange::from_list("1-")?), - }; - - let fields = match (&opts.exclude, &opts.exclude_header) { - (Some(exclude), Some(exclude_header)) => { - let exclude = FieldRange::from_list(exclude)?; - let fields = FieldRange::exclude(fields, exclude); - let first_line = if let Some(first_line) = extra { - first_line - } else { - input.peek_first_line()? - }; - let exclude_headers = FieldRange::from_header_list( - &exclude_header, - first_line.as_bytes(), - &delim, - opts.header_is_regex, - )?; - extra = Some(first_line); - FieldRange::exclude(fields, exclude_headers) - } - (Some(exclude), None) => { - let exclude = FieldRange::from_list(exclude)?; - FieldRange::exclude(fields, exclude) - } - (None, Some(exclude_header)) => { - let first_line = if let Some(first_line) = extra { - first_line - } else { - input.peek_first_line()? - }; - let exclude_headers = FieldRange::from_header_list( - &exclude_header, - first_line.as_bytes(), - &delim, - opts.header_is_regex, - )?; - extra = Some(first_line); - FieldRange::exclude(fields, exclude_headers) - } - (None, None) => fields, - }; - + let (extra, fields) = conf.parse_fields(&input)?; // No point processing empty fields if fields.is_empty() { return Ok(()); } - match &delim { + match conf.parsed_delim() { RegexOrStr::Regex(regex) => { let mut core = Core::new( - &conf, + conf, &fields, RegexLineParser::new(&fields, ®ex), line_buffer, @@ -402,14 +332,17 @@ mod test { } /// Build a set of opts for testing + #[allow(clippy::too_many_arguments)] fn build_opts_generic( input_file: impl AsRef, output_file: impl AsRef, - fields: &str, - exclude: Option, + fields: Option<&str>, + header_field: Option>, + exclude: Option<&str>, no_mmap: bool, delimiter: &str, delim_is_literal: bool, + header_is_regex: bool, ) -> Opts { Opts { input: vec![input_file.as_ref().to_path_buf()], @@ -417,13 +350,13 @@ mod test { delimiter: delimiter.to_string(), delim_is_literal, output_delimiter: "\t".to_owned(), - fields: Some(fields.to_owned()), - header_field: None, - header_is_regex: true, + fields: fields.map(|f| f.to_owned()), + header_field, + header_is_regex, try_decompress: false, no_mmap, crlf: false, - exclude, + exclude: exclude.map(|e| e.to_owned()), exclude_header: None, } } @@ -465,14 +398,19 @@ mod test { unsafe { MmapChoice::auto() } }) .output_delimiter(opts.output_delimiter.as_bytes()) - .build(); + .headers(opts.header_field.as_deref()) + .fields(opts.fields.as_deref()) + .exclude(opts.exclude.as_deref()) + .exclude_headers(opts.exclude_header.as_deref()) + .header_is_regex(opts.header_is_regex) + .build() + .unwrap(); let mut line_buffer = LineBufferBuilder::new().build(); let mut writer = BufWriter::new(File::create(output).unwrap()); run( HckInput::Path(input.as_ref().to_owned()), &mut writer, - opts, - conf, + &conf, &mut line_buffer, ) .unwrap(); @@ -492,11 +430,13 @@ mod test { let opts = build_opts_generic( &input_file, &output_file, - "1,3", - Some(String::from("3")), + Some("1,3"), + None, + Some("3"), no_mmap, hck_delim, delim_is_literal, + false, ); let data = vec![vec!["a", "b", "c"], vec!["1", "2", "3"]]; write_file(&input_file, data, hck_delim); @@ -518,11 +458,13 @@ mod test { let opts = build_opts_generic( &input_file, &output_file, - "3-", - Some(String::from("-5")), + Some("3-"), + None, + Some("-5"), no_mmap, hck_delim, delim_is_literal, + false, ); let data = vec![ vec!["a", "b", "c", "d", "e", "f"], @@ -547,11 +489,13 @@ mod test { let opts = build_opts_generic( &input_file, &output_file, - "2-5", - Some(String::from("3-")), + Some("2-5"), + None, + Some("3-"), no_mmap, hck_delim, delim_is_literal, + false, ); let data = vec![ vec!["a", "b", "c", "d", "e", "f"], @@ -576,11 +520,13 @@ mod test { let opts = build_opts_generic( &input_file, &output_file, - "1-", - Some(String::from("3-5")), + Some("1-"), + None, + Some("3-5"), no_mmap, hck_delim, delim_is_literal, + false, ); let data = vec![ vec!["a", "b", "c", "d", "e", "f"], @@ -605,11 +551,13 @@ mod test { let opts = build_opts_generic( &input_file, &output_file, - "4,3", - Some(String::from("2-5")), + Some("4,3"), + None, + Some("2-5"), no_mmap, hck_delim, delim_is_literal, + false, ); let data = vec![ vec!["a", "b", "c", "d", "e", "f"], @@ -634,11 +582,13 @@ mod test { let opts = build_opts_generic( &input_file, &output_file, - "4-6,1-3", - Some(String::from("3-5")), + Some("4-6,1-3"), + None, + Some("3-5"), no_mmap, hck_delim, delim_is_literal, + false, ); let data = vec![ vec!["a", "b", "c", "d", "e", "f"], @@ -651,6 +601,67 @@ mod test { assert_eq!(filtered, vec![vec!["f", "a", "b"], vec!["6", "1", "2"]]); } + #[rstest] + fn test_headers_simple( + #[values(true, false)] no_mmap: bool, + #[values(r" ", " ")] hck_delim: &str, + #[values(true, false)] delim_is_literal: bool, + #[values(true, false)] header_is_regex: bool, + ) { + let tmp = TempDir::new().unwrap(); + let input_file = tmp.path().join("input.txt"); + let output_file = tmp.path().join("output.txt"); + let opts = build_opts_generic( + &input_file, + &output_file, + None, + Some(vec![Regex::new("a").unwrap()]), + None, + no_mmap, + hck_delim, + delim_is_literal, + header_is_regex, + ); + let data = vec![ + vec!["a", "b", "c", "d", "e", "f"], + vec!["1", "2", "3", "4", "5", "6"], + ]; + write_file(&input_file, data, hck_delim); + run_wrapper(&input_file, &output_file, &opts); + let filtered = read_tsv(output_file); + + assert_eq!(filtered, vec![vec!["a"], vec!["1"]]); + } + + #[rstest] + fn test_headers_simple2( + #[values(true, false)] no_mmap: bool, + #[values(r" ", " ")] hck_delim: &str, + #[values(true, false)] delim_is_literal: bool, + #[values(true, false)] header_is_regex: bool, + ) { + let tmp = TempDir::new().unwrap(); + let input_file = tmp.path().join("input.txt"); + let output_file = tmp.path().join("output.txt"); + let opts = build_opts_generic( + &input_file, + &output_file, + None, + Some(vec![Regex::new("a").unwrap(), Regex::new("c").unwrap()]), + None, + no_mmap, + hck_delim, + delim_is_literal, + header_is_regex, + ); + let data = vec![vec!["a", "b", "c"], vec!["1", "2", "3"]]; + write_file(&input_file, data, hck_delim); + run_wrapper(&input_file, &output_file, &opts); + let filtered = read_tsv(output_file); + + assert_eq!(filtered, vec![vec!["a", "c"], vec!["1", "3"]]); + } + #[rstest] #[rustfmt::skip::macros(vec)] fn test_read_single_values(