From 1c54ae47d86971e4dca0820613654fde22af17a8 Mon Sep 17 00:00:00 2001 From: jeff-k Date: Sun, 12 May 2024 18:06:38 +0100 Subject: [PATCH] update readme --- .github/workflows/rust.yml | 22 +++++++++++++ Cargo.toml | 4 +-- README.md | 63 ++++++++++++++++++++++++++++++++++++-- 3 files changed, 85 insertions(+), 4 deletions(-) create mode 100644 .github/workflows/rust.yml diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml new file mode 100644 index 0000000..31000a2 --- /dev/null +++ b/.github/workflows/rust.yml @@ -0,0 +1,22 @@ +name: Rust + +on: + push: + branches: [ "main" ] + pull_request: + branches: [ "main" ] + +env: + CARGO_TERM_COLOR: always + +jobs: + build: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + - name: Build + run: cargo build --verbose + - name: Run tests + run: cargo test --verbose diff --git a/Cargo.toml b/Cargo.toml index d550015..753b17a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,12 +6,12 @@ edition = "2021" [dependencies] futures = "0.3" futures-test = "0.3" -bio-seq = { path="../bio-seq/bio-seq" } +bio-seq = "0.12" [dev-dependencies] flate2 = "1" clap = { version="4", features=["derive"] } -bio-seq = { path="../bio-seq/bio-seq" } +bio-seq = "0.12" [[example]] name = "fqcheck" diff --git a/README.md b/README.md index 9f8cb50..5a6dcfd 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,65 @@ -# bio-streams +[![Docs.rs](https://docs.rs/bio-streams/badge.svg)](https://docs.rs/bio-streams) +[![CI status](https://github.com/jeff-k/bio-streams/actions/workflows/rust.yml/badge.svg)](https://github.com/jeff-k/bio-streams/actions/workflows/rust.yml) -## examples +
+ +# bio-steams + +### Types and datastructures for streaming genomics data + +#### This crate is in early development. Contributions are very welcome. + +Webassembly example: (https://jeff-k.github.io/fqdemo/)[Remove non M. TB reads from streaming fastqs], (https://jeff-k.github.io/amplicon-tiling/)[amplicon bases SARS-CoV-2 assembly] +
+ +## Features + +Shared `Record` type by `Fastq` and `Fasta` streams: + +```rust +pub struct Record TryFrom<&'a [u8]> = Vec> { + pub fields: Vec, + pub seq: T, + pub quality: Option>, // fasta records set quality to `None` +} +``` + +Records can be read into custom types: `pub struct Fastq>` + +## Examples + +```rust +// Open a pair of gzipped fastq files as streams of `Record`s with `Seq` sequences + +let fq1: Fastq>> = Fastq::new(BufReader::new( + MultiGzDecoder::new(File::open(&file1).unwrap()), +)); + +let fq2: Fastq>> = Fastq::new(BufReader::new( + MultiGzDecoder::new(File::open(&file2).unwrap()), +)); + +for zipped in fq1.zip(fq2) { + match zipped { + (Ok(r1), Ok(r2)) => { + // check that the last characters of the name strings are 1 and 2 + if r1.fields[r1.fields.len() - 1] != b'1' || r2.fields[r2.fields.len() - 1] != b'2' + { + eprintln!("paired records do not end in 1/2"); + } + + // check that the description fields are equal up to the last character + if r1.fields[..r1.fields.len() - 1] != r2.fields[..r2.fields.len() - 1] { + eprintln!("reads do not have the same names"); + exit(1); + } + } + _ => { + eprintln!("Parse error in fastq files"); + } + } +} +``` To run the `fqcheck` example program with read files `r1.fq.gz` and `f2.fq.gz`: