From aae1f711be6d34e15d10d7d8ba396b3ab326b9d5 Mon Sep 17 00:00:00 2001
From: Seth <sstadick@gmail.com>
Date: Thu, 15 Jul 2021 09:17:24 -0600
Subject: [PATCH] [bugfix] issue 26 (#27)

* Fix issue 26

* update changelog

* Clippy

* Maybe fix deb name
---
 .github/workflows/publish.yml |   2 +-
 CHANGELOG.md                  |   4 +-
 Cargo.lock                    |   8 +-
 src/lib/core.rs               | 215 +++++++++++++++++++++++++------
 src/lib/field_range.rs        |  41 ++++--
 src/main.rs                   | 235 ++++++++++++++++++----------------
 6 files changed, 337 insertions(+), 168 deletions(-)
diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
index d7e0a96..314817c 100644
--- a/.github/workflows/publish.yml
+++ b/.github/workflows/publish.yml
@@ -71,7 +71,7 @@ jobs:
             cargo install cargo-deb
             RUSTFLAGS="-Cllvm-args=-pgo-warn-missing-function -Cprofile-use=$(pwd)/pgo-data/merged.profdata" cargo deb
             deb_path=$(find ./target/debian/ -type f -name 'hck*')
-            asset_path="./${{ matrix.asset_name }}.deb"
+            asset_path="${{ matrix.asset_name }}.deb"
             mv "${deb_path}" "${asset_path}"
             echo "DEB=${asset_path}" >> $GITHUB_ENV
           fi
diff --git a/CHANGELOG.md b/CHANGELOG.md
index cd854eb..6c97246 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,8 +1,10 @@
 # Changelog
 
-## v0.5.2-alpha (in progress)
+## v0.5.2
 
 - [PR24](https://github.com/sstadick/hck/pull/24) Removed the now defunct profile guided optimization shell scripts and all references to them in favor of the `justfile` that was added in `v0.5.0`
+- [Bugfix](https://github.com/sstadick/hck/issues/26) fixes incorrect handling of header line for non-stdin inputs, fixes incorrect parsing of last header fields (now strips newline before matching), fixes option parsing so that the `-F` and `-E` options wont' try to consume the positional input arguments. Huge thanks to @learnbyexample for their detailed bug report.
+- Change: An error will now be raised when a specified header is not found. This differs from the convention used by the selecion-by-index, which tries to match `cut`. The reasoning is that it is generally harder to type out each header field and if a header is not found you want to know about it.
 
 ## v0.5.1
 
diff --git a/Cargo.lock b/Cargo.lock
index a00b884..80ebed8 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -573,9 +573,9 @@ checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a"
 
 [[package]]
 name = "structopt"
-version = "0.3.21"
+version = "0.3.22"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5277acd7ee46e63e5168a80734c9f6ee81b1367a7d8772a2d765df2a3705d28c"
+checksum = "69b041cdcb67226aca307e6e7be44c8806423d83e018bd662360a93dabce4d71"
 dependencies = [
  "clap",
  "lazy_static",
@@ -584,9 +584,9 @@ dependencies = [
 
 [[package]]
 name = "structopt-derive"
-version = "0.4.14"
+version = "0.4.15"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5ba9cdfda491b814720b6b06e0cac513d922fc407582032e8706e9f137976f90"
+checksum = "7813934aecf5f51a54775e00068c237de98489463968231a51746bbbc03f9c10"
 dependencies = [
  "heck",
  "proc-macro-error",
diff --git a/src/lib/core.rs b/src/lib/core.rs
index 25beff0..3e599df 100644
--- a/src/lib/core.rs
+++ b/src/lib/core.rs
@@ -4,10 +4,16 @@
 //! lifetime coersion to reuse the `shuffler` vector really locks down the possible options.
 //!
 //! If we go with a dyn trait on the line splitter function it is appreciably slower.
-use crate::{field_range::FieldRange, line_parser::LineParser, mmap::MmapChoice};
+use crate::{
+    field_range::{FieldRange, RegexOrStr},
+    line_parser::LineParser,
+    mmap::MmapChoice,
+};
+use anyhow::Result;
 use bstr::ByteSlice;
 use grep_cli::{DecompressionMatcherBuilder, DecompressionReaderBuilder};
 use memchr;
+use regex::bytes::Regex;
 use ripline::{
     line_buffer::{LineBuffer, LineBufferReader},
     lines::{self, LineIter},
@@ -28,28 +34,8 @@ pub enum HckInput<P: AsRef<Path>> {
     Path(P),
 }
 
-impl<P: AsRef<Path>> HckInput<P> {
-    /// Read the first line of an input and return it.
-    ///
-    /// It's up to the user to make sure that any consumed bytes are properly handed
-    /// off to the line parsers later on.
-    pub fn peek_first_line(&self) -> Result<String, io::Error> {
-        let mut buffer = String::new();
-        match self {
-            HckInput::Stdin => {
-                io::stdin().read_line(&mut buffer)?;
-            }
-
-            HckInput::Path(path) => {
-                BufReader::new(File::open(path)?).read_line(&mut buffer)?;
-            }
-        }
-        Ok(buffer)
-    }
-}
-
 /// The config object for [`Core`].
-#[derive(Debug, Clone, Copy)]
+#[derive(Debug, Clone)]
 pub struct CoreConfig<'a> {
     delimiter: &'a [u8],
     output_delimiter: &'a [u8],
@@ -57,6 +43,12 @@ pub struct CoreConfig<'a> {
     mmap_choice: MmapChoice,
     is_parser_regex: bool,
     try_decompress: bool,
+    raw_fields: Option<&'a str>,
+    raw_header_fields: Option<&'a [Regex]>,
+    raw_exclude: Option<&'a str>,
+    raw_exclude_headers: Option<&'a [Regex]>,
+    header_is_regex: bool,
+    parsed_delim: RegexOrStr<'a>,
 }
 
 impl<'a> Default for CoreConfig<'a> {
@@ -68,24 +60,128 @@ impl<'a> Default for CoreConfig<'a> {
             mmap_choice: unsafe { MmapChoice::auto() },
             is_parser_regex: false,
             try_decompress: false,
+            raw_fields: Some("1-"),
+            raw_header_fields: None,
+            raw_exclude: None,
+            raw_exclude_headers: None,
+            header_is_regex: false,
+            parsed_delim: RegexOrStr::Str(DEFAULT_DELIM.to_str().unwrap()),
         }
     }
 }
 
 impl<'a> CoreConfig<'a> {
-    #[inline]
-    pub fn is_parser_regex(&self) -> bool {
-        self.is_parser_regex
+    /// Get the parsed delimiter
+    pub fn parsed_delim(&self) -> &RegexOrStr<'a> {
+        &self.parsed_delim
     }
 
-    #[inline]
-    pub fn delimiter(&self) -> &[u8] {
-        self.delimiter
+    /// Read the first line of an input and return it.
+    ///
+    /// It's up to the user to make sure that any consumed bytes are properly handed
+    /// off to the line parsers later on.
+    pub fn peek_first_line<P: AsRef<Path>>(
+        &self,
+        input: &HckInput<P>,
+    ) -> Result<Vec<u8>, io::Error> {
+        let mut buffer = String::new();
+        match input {
+            HckInput::Stdin => {
+                io::stdin().read_line(&mut buffer)?;
+            }
+
+            HckInput::Path(path) => {
+                if self.try_decompress {
+                    let mut reader =
+                        BufReader::new(DecompressionReaderBuilder::new().build(&path)?);
+                    reader.read_line(&mut buffer)?;
+                } else {
+                    BufReader::new(File::open(path)?).read_line(&mut buffer)?;
+                }
+            }
+        }
+        Ok(lines::without_terminator(buffer.as_bytes(), self.line_terminator).to_owned())
+    }
+
+    /// Parse the raw user input fields and header fields. Returns any header bytes read and the parsed fields
+    pub fn parse_fields<P>(&self, input: &HckInput<P>) -> Result<(Option<Vec<u8>>, Vec<FieldRange>)>
+    where
+        P: AsRef<Path>,
+    {
+        // Parser the fields in the context of the files being looked at
+        let (mut extra, fields) = match (self.raw_fields, self.raw_header_fields) {
+            (Some(field_list), Some(header_fields)) => {
+                let first_line = self.peek_first_line(&input)?;
+                let mut fields = FieldRange::from_list(field_list)?;
+                let header_fields = FieldRange::from_header_list(
+                    header_fields,
+                    first_line.as_bytes(),
+                    &self.parsed_delim,
+                    self.header_is_regex,
+                )?;
+                fields.extend(header_fields.into_iter());
+                FieldRange::post_process_ranges(&mut fields);
+                (Some(first_line), fields)
+            }
+            (Some(field_list), None) => (None, FieldRange::from_list(field_list)?),
+            (None, Some(header_fields)) => {
+                let first_line = self.peek_first_line(&input)?;
+                let fields = FieldRange::from_header_list(
+                    header_fields,
+                    first_line.as_bytes(),
+                    &self.parsed_delim,
+                    self.header_is_regex,
+                )?;
+                (Some(first_line), fields)
+            }
+            (None, None) => (None, FieldRange::from_list("1-")?),
+        };
+
+        let fields = match (&self.raw_exclude, &self.raw_exclude_headers) {
+            (Some(exclude), Some(exclude_header)) => {
+                let exclude = FieldRange::from_list(exclude)?;
+                let fields = FieldRange::exclude(fields, exclude);
+                let first_line = if let Some(first_line) = extra {
+                    first_line
+                } else {
+                    self.peek_first_line(&input)?
+                };
+                let exclude_headers = FieldRange::from_header_list(
+                    &exclude_header,
+                    first_line.as_bytes(),
+                    &self.parsed_delim,
+                    self.header_is_regex,
+                )?;
+                extra = Some(first_line);
+                FieldRange::exclude(fields, exclude_headers)
+            }
+            (Some(exclude), None) => {
+                let exclude = FieldRange::from_list(exclude)?;
+                FieldRange::exclude(fields, exclude)
+            }
+            (None, Some(exclude_header)) => {
+                let first_line = if let Some(first_line) = extra {
+                    first_line
+                } else {
+                    self.peek_first_line(&input)?
+                };
+                let exclude_headers = FieldRange::from_header_list(
+                    &exclude_header,
+                    first_line.as_bytes(),
+                    &self.parsed_delim,
+                    self.header_is_regex,
+                )?;
+                extra = Some(first_line);
+                FieldRange::exclude(fields, exclude_headers)
+            }
+            (None, None) => fields,
+        };
+        Ok((extra, fields))
     }
 }
 
 /// A builder for the [`CoreConfig`] which drives [`Core`].
-#[derive(Copy, Clone, Debug)]
+#[derive(Clone, Debug)]
 pub struct CoreConfigBuilder<'a> {
     config: CoreConfig<'a>,
 }
@@ -97,45 +193,82 @@ impl<'a> CoreConfigBuilder<'a> {
         }
     }
 
-    pub fn build(self) -> CoreConfig<'a> {
-        self.config
+    pub fn build(mut self) -> Result<CoreConfig<'a>> {
+        let delim = if self.config.is_parser_regex {
+            RegexOrStr::Regex(Regex::new(self.config.delimiter.to_str()?)?)
+        } else {
+            RegexOrStr::Str(self.config.delimiter.to_str()?)
+        };
+        self.config.parsed_delim = delim;
+        Ok(self.config)
     }
 
     /// The substr to split lines on.
-    pub fn delimiter(&mut self, delim: &'a [u8]) -> &mut Self {
+    pub fn delimiter(mut self, delim: &'a [u8]) -> Self {
         self.config.delimiter = delim;
         self
     }
 
     /// The substr to use as the output delimiter
-    pub fn output_delimiter(&mut self, delim: &'a [u8]) -> &mut Self {
+    pub fn output_delimiter(mut self, delim: &'a [u8]) -> Self {
         self.config.output_delimiter = delim;
         self
     }
 
     /// The line terminator to use when looking for linebreaks and stripping linebreach chars.
-    pub fn line_terminator(&mut self, term: LineTerminator) -> &mut Self {
+    pub fn line_terminator(mut self, term: LineTerminator) -> Self {
         self.config.line_terminator = term;
         self
     }
 
     /// Whether or not to try to use mmap mode
-    pub fn mmap(&mut self, mmap_choice: MmapChoice) -> &mut Self {
+    pub fn mmap(mut self, mmap_choice: MmapChoice) -> Self {
         self.config.mmap_choice = mmap_choice;
         self
     }
 
     /// Whether or not the parser is a regex
-    pub fn is_regex_parser(&mut self, is_regex: bool) -> &mut Self {
+    #[allow(clippy::wrong_self_convention)]
+    pub fn is_regex_parser(mut self, is_regex: bool) -> Self {
         self.config.is_parser_regex = is_regex;
         self
     }
 
     /// Try to decompress an input file
-    pub fn try_decompress(&mut self, try_decompress: bool) -> &mut Self {
+    pub fn try_decompress(mut self, try_decompress: bool) -> Self {
         self.config.try_decompress = try_decompress;
         self
     }
+
+    /// The raw user input fields to output
+    pub fn fields(mut self, fields: Option<&'a str>) -> Self {
+        self.config.raw_fields = fields;
+        self
+    }
+
+    /// The raw user input header to output
+    pub fn headers(mut self, headers: Option<&'a [Regex]>) -> Self {
+        self.config.raw_header_fields = headers;
+        self
+    }
+
+    /// The raw user input fields to exclude
+    pub fn exclude(mut self, exclude: Option<&'a str>) -> Self {
+        self.config.raw_exclude = exclude;
+        self
+    }
+
+    /// The raw user input headers to exclude
+    pub fn exclude_headers(mut self, exclude_headers: Option<&'a [Regex]>) -> Self {
+        self.config.raw_exclude_headers = exclude_headers;
+        self
+    }
+
+    /// Whether or not to treat the headers as regex
+    pub fn header_is_regex(mut self, header_is_regex: bool) -> Self {
+        self.config.header_is_regex = header_is_regex;
+        self
+    }
 }
 
 impl<'a> Default for CoreConfigBuilder<'a> {
@@ -203,18 +336,18 @@ where
         &mut self,
         input: HckInput<P>,
         mut output: W,
-        header: Option<String>,
+        header: Option<Vec<u8>>,
     ) -> Result<(), io::Error>
     where
         P: AsRef<Path>,
         W: Write,
     {
-        if let Some(header) = header {
-            self.hck_bytes(header.as_bytes(), &mut output)?;
-        }
         // Dispatch to a given `hck_*` runner depending on configuration
         match input {
             HckInput::Stdin => {
+                if let Some(header) = header {
+                    self.hck_bytes(header.as_bytes(), &mut output)?;
+                }
                 let reader = io::stdin();
                 if self.allow_fastmode() {
                     self.hck_reader_fast(reader, &mut output)
diff --git a/src/lib/field_range.rs b/src/lib/field_range.rs
index 3f74719..d3c8284 100644
--- a/src/lib/field_range.rs
+++ b/src/lib/field_range.rs
@@ -13,8 +13,10 @@ use thiserror::Error;
 const MAX: usize = usize::MAX;
 
 /// Errors for parsing / validating [`FieldRange`] strings.
-#[derive(Error, Debug)]
+#[derive(Error, Debug, PartialEq)]
 pub enum FieldError {
+    #[error("Header not found: {0}")]
+    HeaderNotFound(String),
     #[error("Fields and positions are numbered from 1: {0}")]
     InvalidField(usize),
     #[error("High end of range less than low end of range: {0}-{1}")]
@@ -25,7 +27,7 @@ pub enum FieldError {
     NoHeadersMatched,
 }
 
-#[derive(Debug)]
+#[derive(Debug, Clone)]
 pub enum RegexOrStr<'b> {
     Regex(Regex),
     Str(&'b str),
@@ -153,10 +155,12 @@ impl FieldRange {
         header_is_regex: bool,
     ) -> Result<Vec<FieldRange>, FieldError> {
         let mut ranges = vec![];
+        let mut found = vec![false; list.len()];
         for (i, header) in delim.split(header).enumerate() {
             for (j, regex) in list.iter().enumerate() {
                 if !header_is_regex {
                     if regex.as_str().as_bytes() == header {
+                        found[j] = true;
                         ranges.push(FieldRange {
                             low: i,
                             high: i,
@@ -164,6 +168,7 @@ impl FieldRange {
                         });
                     }
                 } else if regex.is_match(header) {
+                    found[j] = true;
                     ranges.push(FieldRange {
                         low: i,
                         high: i,
@@ -176,6 +181,11 @@ impl FieldRange {
         if ranges.is_empty() {
             return Err(FieldError::NoHeadersMatched);
         }
+        for (i, was_found) in found.into_iter().enumerate() {
+            if !was_found {
+                return Err(FieldError::HeaderNotFound(list[i].as_str().to_owned()));
+            }
+        }
 
         FieldRange::post_process_ranges(&mut ranges);
 
@@ -364,23 +374,36 @@ mod test {
         let header = b"is_cat-is-isdog-wascow-was_is_apple-12345-!$%*(_)";
         let delim = Regex::new("-").unwrap();
         let delim = RegexOrStr::Regex(delim);
-        let header_fields = vec![
-            Regex::new(r"^is_.*$").unwrap(),
-            Regex::new("dog").unwrap(),
-            Regex::new(r"\$%").unwrap(),
-            Regex::new(r"is").unwrap(),
-        ];
+        let header_fields = vec![Regex::new(r"is").unwrap()];
         let fields = FieldRange::from_header_list(&header_fields, header, &delim, false).unwrap();
         assert_eq!(
             vec![FieldRange {
                 low: 1,
                 high: 1,
-                pos: 3
+                pos: 0
             },],
             fields
         );
     }
 
+    #[test]
+    fn test_parse_header_fields_literal_header_not_found() {
+        let header = b"is_cat-is-isdog-wascow-was_is_apple-12345-!$%*(_)";
+        let delim = Regex::new("-").unwrap();
+        let delim = RegexOrStr::Regex(delim);
+        let header_fields = vec![
+            Regex::new(r"^is_.*$").unwrap(),
+            Regex::new("dog").unwrap(),
+            Regex::new(r"\$%").unwrap(),
+            Regex::new(r"is").unwrap(),
+        ];
+        let result = FieldRange::from_header_list(&header_fields, header, &delim, false);
+        assert_eq!(
+            result.unwrap_err(),
+            FieldError::HeaderNotFound(String::from(r"^is_.*$"))
+        );
+    }
+
     #[test]
     #[rustfmt::skip::macros(assert_eq)]
     fn test_exclude_simple() {
diff --git a/src/main.rs b/src/main.rs
index 8aa5199..1df2a0e 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,10 +1,9 @@
 use anyhow::{Context, Error, Result};
-use bstr::ByteSlice;
 use env_logger::Env;
 use grep_cli::stdout;
 use hcklib::{
     core::{Core, CoreConfig, CoreConfigBuilder, HckInput},
-    field_range::{FieldRange, RegexOrStr},
+    field_range::RegexOrStr,
     line_parser::{RegexLineParser, SubStrLineParser},
     mmap::MmapChoice,
 };
@@ -146,12 +145,12 @@ struct Opts {
 
     /// Headers to exclude from the output, ex: '^badfield.*$`. This is a string literal by default.
     /// Add the `-r` flag to treat as a regex.
-    #[structopt(short = "E", long)]
+    #[structopt(short = "E", long, multiple = true, number_of_values = 1)]
     exclude_header: Option<Vec<Regex>>,
 
     /// A string literal or regex to select headers, ex: '^is_.*$`. This is a string literal
     /// by default. add the `-r` flag to treat it as a regex.
-    #[structopt(short = "F", long)]
+    #[structopt(short = "F", long, multiple = true, number_of_values = 1)]
     header_field: Option<Vec<Regex>>,
 
     /// Treat the header_fields as regexs instead of string literals
@@ -200,24 +199,30 @@ fn main() -> Result<()> {
     } else {
         LineTerminator::default()
     };
-    conf_builder.line_terminator(line_term);
+    conf_builder = conf_builder.line_terminator(line_term);
 
     let mmap = if opts.no_mmap {
         MmapChoice::never()
     } else {
         unsafe { MmapChoice::auto() }
     };
-    conf_builder.mmap(mmap);
-    conf_builder.delimiter(&opts.delimiter.as_bytes());
-    conf_builder.output_delimiter(&opts.output_delimiter.as_bytes());
-    conf_builder.is_regex_parser(!opts.delim_is_literal);
-    conf_builder.try_decompress(opts.try_decompress);
-    let conf = conf_builder.build();
+    let conf = conf_builder
+        .mmap(mmap)
+        .delimiter(&opts.delimiter.as_bytes())
+        .output_delimiter(&opts.output_delimiter.as_bytes())
+        .is_regex_parser(!opts.delim_is_literal)
+        .try_decompress(opts.try_decompress)
+        .fields(opts.fields.as_deref())
+        .headers(opts.header_field.as_deref())
+        .exclude(opts.exclude.as_deref())
+        .exclude_headers(opts.exclude_header.as_deref())
+        .header_is_regex(opts.header_is_regex)
+        .build()?;
 
     let mut line_buffer = LineBufferBuilder::new().build();
 
     for input in inputs.into_iter() {
-        if let Err(err) = run(input, &mut writer, &opts, conf, &mut line_buffer) {
+        if let Err(err) = run(input, &mut writer, &conf, &mut line_buffer) {
             if is_broken_pipe(&err) {
                 exit(0)
             }
@@ -232,96 +237,21 @@ fn main() -> Result<()> {
 fn run<W: Write>(
     input: HckInput<PathBuf>,
     writer: &mut W,
-    opts: &Opts,
-    conf: CoreConfig,
+    conf: &CoreConfig,
     line_buffer: &mut LineBuffer,
 ) -> Result<()> {
     let writer = BufWriter::new(writer);
 
-    let delim = if conf.is_parser_regex() {
-        RegexOrStr::Regex(Regex::new(conf.delimiter().to_str()?)?)
-    } else {
-        RegexOrStr::Str(conf.delimiter().to_str()?)
-    };
-
-    // Parser the fields in the context of the files being looked at
-    let (mut extra, fields) = match (&opts.fields, &opts.header_field) {
-        (Some(field_list), Some(header_fields)) => {
-            let first_line = input.peek_first_line()?;
-            let mut fields = FieldRange::from_list(field_list)?;
-            let header_fields = FieldRange::from_header_list(
-                header_fields,
-                first_line.as_bytes(),
-                &delim,
-                opts.header_is_regex,
-            )?;
-            fields.extend(header_fields.into_iter());
-            FieldRange::post_process_ranges(&mut fields);
-            (Some(first_line), fields)
-        }
-        (Some(field_list), None) => (None, FieldRange::from_list(field_list)?),
-        (None, Some(header_fields)) => {
-            let first_line = input.peek_first_line()?;
-            let fields = FieldRange::from_header_list(
-                header_fields,
-                first_line.as_bytes(),
-                &delim,
-                opts.header_is_regex,
-            )?;
-            (Some(first_line), fields)
-        }
-        (None, None) => (None, FieldRange::from_list("1-")?),
-    };
-
-    let fields = match (&opts.exclude, &opts.exclude_header) {
-        (Some(exclude), Some(exclude_header)) => {
-            let exclude = FieldRange::from_list(exclude)?;
-            let fields = FieldRange::exclude(fields, exclude);
-            let first_line = if let Some(first_line) = extra {
-                first_line
-            } else {
-                input.peek_first_line()?
-            };
-            let exclude_headers = FieldRange::from_header_list(
-                &exclude_header,
-                first_line.as_bytes(),
-                &delim,
-                opts.header_is_regex,
-            )?;
-            extra = Some(first_line);
-            FieldRange::exclude(fields, exclude_headers)
-        }
-        (Some(exclude), None) => {
-            let exclude = FieldRange::from_list(exclude)?;
-            FieldRange::exclude(fields, exclude)
-        }
-        (None, Some(exclude_header)) => {
-            let first_line = if let Some(first_line) = extra {
-                first_line
-            } else {
-                input.peek_first_line()?
-            };
-            let exclude_headers = FieldRange::from_header_list(
-                &exclude_header,
-                first_line.as_bytes(),
-                &delim,
-                opts.header_is_regex,
-            )?;
-            extra = Some(first_line);
-            FieldRange::exclude(fields, exclude_headers)
-        }
-        (None, None) => fields,
-    };
-
+    let (extra, fields) = conf.parse_fields(&input)?;
     // No point processing empty fields
     if fields.is_empty() {
         return Ok(());
     }
 
-    match &delim {
+    match conf.parsed_delim() {
         RegexOrStr::Regex(regex) => {
             let mut core = Core::new(
-                &conf,
+                conf,
                 &fields,
                 RegexLineParser::new(&fields, &regex),
                 line_buffer,
@@ -402,14 +332,17 @@ mod test {
     }
 
     /// Build a set of opts for testing
+    #[allow(clippy::too_many_arguments)]
     fn build_opts_generic(
         input_file: impl AsRef<Path>,
         output_file: impl AsRef<Path>,
-        fields: &str,
-        exclude: Option<String>,
+        fields: Option<&str>,
+        header_field: Option<Vec<Regex>>,
+        exclude: Option<&str>,
         no_mmap: bool,
         delimiter: &str,
         delim_is_literal: bool,
+        header_is_regex: bool,
     ) -> Opts {
         Opts {
             input: vec![input_file.as_ref().to_path_buf()],
@@ -417,13 +350,13 @@ mod test {
             delimiter: delimiter.to_string(),
             delim_is_literal,
             output_delimiter: "\t".to_owned(),
-            fields: Some(fields.to_owned()),
-            header_field: None,
-            header_is_regex: true,
+            fields: fields.map(|f| f.to_owned()),
+            header_field,
+            header_is_regex,
             try_decompress: false,
             no_mmap,
             crlf: false,
-            exclude,
+            exclude: exclude.map(|e| e.to_owned()),
             exclude_header: None,
         }
     }
@@ -465,14 +398,19 @@ mod test {
                 unsafe { MmapChoice::auto() }
             })
             .output_delimiter(opts.output_delimiter.as_bytes())
-            .build();
+            .headers(opts.header_field.as_deref())
+            .fields(opts.fields.as_deref())
+            .exclude(opts.exclude.as_deref())
+            .exclude_headers(opts.exclude_header.as_deref())
+            .header_is_regex(opts.header_is_regex)
+            .build()
+            .unwrap();
         let mut line_buffer = LineBufferBuilder::new().build();
         let mut writer = BufWriter::new(File::create(output).unwrap());
         run(
             HckInput::Path(input.as_ref().to_owned()),
             &mut writer,
-            opts,
-            conf,
+            &conf,
             &mut line_buffer,
         )
         .unwrap();
@@ -492,11 +430,13 @@ mod test {
         let opts = build_opts_generic(
             &input_file,
             &output_file,
-            "1,3",
-            Some(String::from("3")),
+            Some("1,3"),
+            None,
+            Some("3"),
             no_mmap,
             hck_delim,
             delim_is_literal,
+            false,
         );
         let data = vec![vec!["a", "b", "c"], vec!["1", "2", "3"]];
         write_file(&input_file, data, hck_delim);
@@ -518,11 +458,13 @@ mod test {
         let opts = build_opts_generic(
             &input_file,
             &output_file,
-            "3-",
-            Some(String::from("-5")),
+            Some("3-"),
+            None,
+            Some("-5"),
             no_mmap,
             hck_delim,
             delim_is_literal,
+            false,
         );
         let data = vec![
             vec!["a", "b", "c", "d", "e", "f"],
@@ -547,11 +489,13 @@ mod test {
         let opts = build_opts_generic(
             &input_file,
             &output_file,
-            "2-5",
-            Some(String::from("3-")),
+            Some("2-5"),
+            None,
+            Some("3-"),
             no_mmap,
             hck_delim,
             delim_is_literal,
+            false,
         );
         let data = vec![
             vec!["a", "b", "c", "d", "e", "f"],
@@ -576,11 +520,13 @@ mod test {
         let opts = build_opts_generic(
             &input_file,
             &output_file,
-            "1-",
-            Some(String::from("3-5")),
+            Some("1-"),
+            None,
+            Some("3-5"),
             no_mmap,
             hck_delim,
             delim_is_literal,
+            false,
         );
         let data = vec![
             vec!["a", "b", "c", "d", "e", "f"],
@@ -605,11 +551,13 @@ mod test {
         let opts = build_opts_generic(
             &input_file,
             &output_file,
-            "4,3",
-            Some(String::from("2-5")),
+            Some("4,3"),
+            None,
+            Some("2-5"),
             no_mmap,
             hck_delim,
             delim_is_literal,
+            false,
         );
         let data = vec![
             vec!["a", "b", "c", "d", "e", "f"],
@@ -634,11 +582,13 @@ mod test {
         let opts = build_opts_generic(
             &input_file,
             &output_file,
-            "4-6,1-3",
-            Some(String::from("3-5")),
+            Some("4-6,1-3"),
+            None,
+            Some("3-5"),
             no_mmap,
             hck_delim,
             delim_is_literal,
+            false,
         );
         let data = vec![
             vec!["a", "b", "c", "d", "e", "f"],
@@ -651,6 +601,67 @@ mod test {
         assert_eq!(filtered, vec![vec!["f", "a", "b"], vec!["6", "1", "2"]]);
     }
 
+    #[rstest]
+    fn test_headers_simple(
+        #[values(true, false)] no_mmap: bool,
+        #[values(r" ", "  ")] hck_delim: &str,
+        #[values(true, false)] delim_is_literal: bool,
+        #[values(true, false)] header_is_regex: bool,
+    ) {
+        let tmp = TempDir::new().unwrap();
+        let input_file = tmp.path().join("input.txt");
+        let output_file = tmp.path().join("output.txt");
+        let opts = build_opts_generic(
+            &input_file,
+            &output_file,
+            None,
+            Some(vec![Regex::new("a").unwrap()]),
+            None,
+            no_mmap,
+            hck_delim,
+            delim_is_literal,
+            header_is_regex,
+        );
+        let data = vec![
+            vec!["a", "b", "c", "d", "e", "f"],
+            vec!["1", "2", "3", "4", "5", "6"],
+        ];
+        write_file(&input_file, data, hck_delim);
+        run_wrapper(&input_file, &output_file, &opts);
+        let filtered = read_tsv(output_file);
+
+        assert_eq!(filtered, vec![vec!["a"], vec!["1"]]);
+    }
+
+    #[rstest]
+    fn test_headers_simple2(
+        #[values(true, false)] no_mmap: bool,
+        #[values(r" ", "  ")] hck_delim: &str,
+        #[values(true, false)] delim_is_literal: bool,
+        #[values(true, false)] header_is_regex: bool,
+    ) {
+        let tmp = TempDir::new().unwrap();
+        let input_file = tmp.path().join("input.txt");
+        let output_file = tmp.path().join("output.txt");
+        let opts = build_opts_generic(
+            &input_file,
+            &output_file,
+            None,
+            Some(vec![Regex::new("a").unwrap(), Regex::new("c").unwrap()]),
+            None,
+            no_mmap,
+            hck_delim,
+            delim_is_literal,
+            header_is_regex,
+        );
+        let data = vec![vec!["a", "b", "c"], vec!["1", "2", "3"]];
+        write_file(&input_file, data, hck_delim);
+        run_wrapper(&input_file, &output_file, &opts);
+        let filtered = read_tsv(output_file);
+
+        assert_eq!(filtered, vec![vec!["a", "c"], vec!["1", "3"]]);
+    }
+
     #[rstest]
     #[rustfmt::skip::macros(vec)]
     fn test_read_single_values(