diff --git a/CHANGELOG.md b/CHANGELOG.md index f44d590..54ff221 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,10 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [v0.6.2] +### Fixed +- The de-novo QC stage failing when not a single input read re-aligns against the draft consensus. + ## [v0.6.1] ### Changed - More informative warnings for failed samples in the report intro section. diff --git a/bin/workflow_glue/trim_and_qc.py b/bin/workflow_glue/trim_and_qc.py index 1abd580..9010e58 100755 --- a/bin/workflow_glue/trim_and_qc.py +++ b/bin/workflow_glue/trim_and_qc.py @@ -20,10 +20,24 @@ def main(args): args.depth, sep="\t", header=None, names=["ref", "start", "end", "depth"] ) - # calculate the mean depths of the candidate consensus contigs - mean_depths = depths.groupby("ref").apply( - lambda df: df.eval("(end - start) * depth").sum() / df["end"].iloc[-1] - ) + with pysam.FastxFile(args.consensus) as f: + fastq = {entry.name: entry for entry in f} + + # make sure we got the same seq IDs in flagstat and the FASTQ file + seq_ids = [x for x in flagstat.index if x != "*"] + if set(fastq.keys()) != set(seq_ids): + ValueError("Sequence IDs in FASTQ and in flagstat file don't agree.") + + # if all reads in the BAM are unmapped, `mosdepth` annoyingly outputs an empty + # per-base depth file; we need to account for that + if depths.empty: + mean_depths = pd.Series(0, index=seq_ids) + else: + # we got a valid depth file; calculate the mean depths of the candidate + # consensus contigs + mean_depths = depths.groupby("ref").apply( + lambda df: df.eval("(end - start) * depth").sum() / df["end"].iloc[-1] + ) # perform QC checks on each contig qc_stats = pd.DataFrame( @@ -42,7 +56,7 @@ def main(args): qc_stats.index.name = "contig" for contig, mean_depth in mean_depths.items(): status = "passed" - contig_len = depths.query("ref == @contig")["end"].iloc[-1] + contig_len = len(fastq[contig].sequence) fail_reasons = [] if mean_depth < args.minimum_depth: status = "failed" diff --git a/nextflow.config b/nextflow.config index ba1e1fe..3371657 100644 --- a/nextflow.config +++ b/nextflow.config @@ -76,7 +76,7 @@ manifest { description = 'Amplicon workflow' mainScript = 'main.nf' nextflowVersion = '>=23.04.2' - version = 'v0.6.1' + version = 'v0.6.2' } epi2melabs {