Skip to content

Commit

Permalink
Merge pull request #1424 from nextstrain/fix/fasta-empty-lines
Browse files Browse the repository at this point in the history
  • Loading branch information
ivan-aksamentov authored Mar 5, 2024
2 parents ef481fc + da0eb50 commit 2745b47
Show file tree
Hide file tree
Showing 2 changed files with 372 additions and 11 deletions.
110 changes: 103 additions & 7 deletions packages/nextclade/src/io/concat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,15 @@ where

/// Concatenate readers into a single reader, alternating them with the provided delimiter,
/// i.e. inserting this sequence of bytes between adjacent readers.
///
/// The idea is that if readers (e.g. files) don't contain trailing newline, it will make line parsers behave
/// incorrectly on reader boundaries. So one can add newline delimiter between readers, to compensate for that.
///
/// TODO: It might be better to extract the functionality of appending a trailing newline to another iterator class,
/// and then wrap each reader individually, instead of doing it conditionally in the concatenation iterator like it
/// is done currently.
///
/// TODO: Ideally, if a reader contains trailing newline(s), we'd like to avoid appending another newline.
pub fn with_delimiter(mut iter: I, delimiter: Option<Vec<u8>>) -> Concat<I> {
let curr = iter.next();
Concat { iter, curr, delimiter }
Expand All @@ -47,7 +56,7 @@ where
/// Returns a reference to the item last read, or None if the iterator has been exhausted.
///
/// This is useful for error handling and reporting: if a read operation fails, the reference
/// returned will point to the item which caused the the error.
/// returned will point to the item which caused the error.
#[inline]
pub const fn current(&self) -> Option<&<I as Iterator>::Item> {
self.curr.as_ref()
Expand Down Expand Up @@ -88,17 +97,68 @@ where

#[cfg(test)]
mod tests {
#![allow(clippy::iter_on_single_items, clippy::redundant_type_annotations)]
use super::*;
use pretty_assertions::assert_eq;
use rstest::rstest;

#[allow(clippy::redundant_type_annotations)]
#[rstest]
fn concatenates_readers_with_delimiter() {
// The idea is that readers (e.g. files) don't contain newline at the end, making line parsing incorrect on reader
// boundaries.
// It is expected that the newline delimiter will be inserted between readers, to compensate for that.
fn test_concatenate_with_delimiter_both_with_trailing_newline() {
let r1: &[u8] = b"First\nreader\n";
let r2: &[u8] = b"Second\nreader\n";

let mut concat = Concat::with_delimiter([r1, r2].into_iter(), Some(b"\n".to_vec()));
let mut result = String::new();
concat.read_to_string(&mut result).unwrap();

assert_eq!(result, "First\nreader\n\nSecond\nreader\n\n");
}

#[rstest]
fn test_concatenate_with_delimiter_one_with_trailing_newline() {
let r1: &[u8] = b"First\nreader\n";

let mut concat = Concat::with_delimiter([r1].into_iter(), Some(b"\n".to_vec()));
let mut result = String::new();
concat.read_to_string(&mut result).unwrap();

assert_eq!(result, "First\nreader\n\n");
}

#[rstest]
fn test_concatenate_with_delimiter_one_without_trailing_newline() {
let r1: &[u8] = b"First\nreader\nwithout\ntrailing\nnewline";

let mut concat = Concat::with_delimiter([r1].into_iter(), Some(b"\n".to_vec()));
let mut result = String::new();
concat.read_to_string(&mut result).unwrap();

assert_eq!(result, "First\nreader\nwithout\ntrailing\nnewline\n");
}

#[rstest]
fn test_concatenate_with_delimiter_one_empty() {
let r1: &[u8] = b"";

let mut concat = Concat::with_delimiter([r1].into_iter(), Some(b"\n".to_vec()));
let mut result = String::new();
concat.read_to_string(&mut result).unwrap();

assert_eq!(result, "\n");
}

#[rstest]
fn test_concatenate_with_delimiter_one_empty_with_trailing_newline() {
let r1: &[u8] = b"\n";

let mut concat = Concat::with_delimiter([r1].into_iter(), Some(b"\n".to_vec()));
let mut result = String::new();
concat.read_to_string(&mut result).unwrap();

assert_eq!(result, "\n\n");
}

#[rstest]
fn test_concatenate_with_delimiter_no_newline() {
let r1: &[u8] = b"No\ntrailing\nnewline";
let r2: &[u8] = b"And\nneither\nhere";

Expand All @@ -108,4 +168,40 @@ mod tests {

assert_eq!(result, "No\ntrailing\nnewline\nAnd\nneither\nhere\n");
}

#[rstest]
fn test_concatenate_with_delimiter_first_empty_no_newline() {
let r1: &[u8] = b"";
let r2: &[u8] = b"Second";

let mut concat = Concat::with_delimiter([r1, r2].into_iter(), Some(b"\n".to_vec()));
let mut result = String::new();
concat.read_to_string(&mut result).unwrap();

assert_eq!(result, "\nSecond\n");
}

#[rstest]
fn test_concatenate_with_delimiter_second_empty_no_newline() {
let r1: &[u8] = b"First";
let r2: &[u8] = b"";

let mut concat = Concat::with_delimiter([r1, r2].into_iter(), Some(b"\n".to_vec()));
let mut result = String::new();
concat.read_to_string(&mut result).unwrap();

assert_eq!(result, "First\n\n");
}

#[rstest]
fn test_concatenate_with_delimiter_both_empty() {
let r1: &[u8] = b"";
let r2: &[u8] = b"";

let mut concat = Concat::with_delimiter([r1, r2].into_iter(), Some(b"\n".to_vec()));
let mut result = String::new();
concat.read_to_string(&mut result).unwrap();

assert_eq!(result, "\n\n");
}
}
Loading

0 comments on commit 2745b47

Please sign in to comment.