diff --git a/CHANGELOG.md b/CHANGELOG.md index dde87957..9ba743f2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -36,7 +36,8 @@ - `salmon/salmon_quant`: Transcript quantification from RNA-seq data (PR #24). * `samtools`: - - `samtools/index`: Index SAM/BAM/CRAM files. + - `samtools/flagstat`: Counts the number of alignments in SAM/BAM/CRAM files for each FLAG type (PR #31). + - `samtools/index`: Index SAM/BAM/CRAM files (PR #33). ## MAJOR CHANGES diff --git a/src/samtools/samtools_flagstat/config.vsh.yaml b/src/samtools/samtools_flagstat/config.vsh.yaml new file mode 100644 index 00000000..20a5e3cd --- /dev/null +++ b/src/samtools/samtools_flagstat/config.vsh.yaml @@ -0,0 +1,51 @@ +name: samtools_flagstat +namespace: samtools +description: Counts the number of alignments in SAM/BAM/CRAM files for each FLAG type. +keywords: [ stats, mapping, counts, bam, sam, cram ] +links: + homepage: https://www.htslib.org/ + documentation: https://www.htslib.org/doc/samtools-flagstat.html + repository: https://github.com/samtools/samtools +references: + doi: [10.1093/bioinformatics/btp352, 10.1093/gigascience/giab008] +license: MIT/Expat + +argument_groups: + - name: Inputs + arguments: + - name: --bam + type: file + description: | + BAM input files. + - name: --bai + type: file + description: | + BAM index file. + - name: Outputs + arguments: + - name: --output + type: file + description: | + File containing samtools stats output. + direction: output + example: output.flagstat + +resources: + - type: bash_script + path: script.sh +test_resources: + - type: bash_script + path: test.sh + - type: file + path: test_data +engines: + - type: docker + image: quay.io/biocontainers/samtools:1.19.2--h50ea8bc_1 + setup: + - type: docker + run: | + samtools --version 2>&1 | grep -E '^(samtools|Using htslib)' | \ + sed 's#Using ##;s# \([0-9\.]*\)$#: \1#' > /var/software_versions.txt +runners: +- type: executable +- type: nextflow \ No newline at end of file diff --git a/src/samtools/samtools_flagstat/help.txt b/src/samtools/samtools_flagstat/help.txt new file mode 100644 index 00000000..fe54d20c --- /dev/null +++ b/src/samtools/samtools_flagstat/help.txt @@ -0,0 +1,13 @@ +```sh +samtools flagstat --help +``` +Usage: samtools flagstat [options] + --input-fmt-option OPT[=VAL] + Specify a single input file format option in the form + of OPTION or OPTION=VALUE + -@, --threads INT + Number of additional threads to use [0] + --verbosity INT + Set level of verbosity + -O, --output-fmt FORMAT[,OPT[=VAL]]... + Specify output format (json, tsv) \ No newline at end of file diff --git a/src/samtools/samtools_flagstat/script.sh b/src/samtools/samtools_flagstat/script.sh new file mode 100644 index 00000000..beac3703 --- /dev/null +++ b/src/samtools/samtools_flagstat/script.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +## VIASH START +## VIASH END + +set -e + +samtools flagstat \ + "$par_bam" \ + > "$par_output" + \ No newline at end of file diff --git a/src/samtools/samtools_flagstat/test.sh b/src/samtools/samtools_flagstat/test.sh new file mode 100644 index 00000000..ec5a5b07 --- /dev/null +++ b/src/samtools/samtools_flagstat/test.sh @@ -0,0 +1,47 @@ +#!/bin/bash + +test_dir="${meta_resources_dir}test_data" +echo ">>> Testing $meta_functionality_name" + +"$meta_executable" \ + --bam "$test_dir/a.bam" \ + --bai "$test_dir/a.bam.bai" \ + --output "$test_dir/a.flagstat" + +echo ">>> Checking whether output exists" +[ ! -f "$test_dir/a.flagstat" ] && echo "File 'a.flagstat' does not exist!" && exit 1 + +echo ">>> Checking whether output is non-empty" +[ ! -s "$test_dir/a.flagstat" ] && echo "File 'a.flagstat' is empty!" && exit 1 + +echo ">>> Checking whether output is correct" +diff "$test_dir/a.flagstat" "$test_dir/a_ref.flagstat" || \ + (echo "Output file a.flagstat does not match expected output" && exit 1) + +rm "$test_dir/a.flagstat" + +############################################################################################ + +echo ">>> Testing $meta_functionality_name with singletons in the input" + +"$meta_executable" \ + --bam "$test_dir/test.paired_end.sorted.bam" \ + --bai "$test_dir/test.paired_end.sorted.bam.bai" \ + --output "$test_dir/test.paired_end.sorted.flagstat" + +echo ">>> Checking whether output exists" +[ ! -f "$test_dir/test.paired_end.sorted.flagstat" ] && echo "File 'test.paired_end.sorted.flagstat' does not exist!" && exit 1 + +echo ">>> Checking whether output is non-empty" +[ ! -s "$test_dir/test.paired_end.sorted.flagstat" ] && echo "File 'test.paired_end.sorted.flagstat' is empty!" && exit 1 + +echo ">>> Checking whether output is correct" +diff "$test_dir/test.paired_end.sorted.flagstat" "$test_dir/test_ref.paired_end.sorted.flagstat" || \ + (echo "Output file test.paired_end.sorted.flagstat does not match expected output" && exit 1) + +rm "$test_dir/test.paired_end.sorted.flagstat" + + + +echo "All tests succeeded!" +exit 0 \ No newline at end of file diff --git a/src/samtools/samtools_flagstat/test_data/a.bam b/src/samtools/samtools_flagstat/test_data/a.bam new file mode 100644 index 00000000..dba1268a Binary files /dev/null and b/src/samtools/samtools_flagstat/test_data/a.bam differ diff --git a/src/samtools/samtools_flagstat/test_data/a.bam.bai b/src/samtools/samtools_flagstat/test_data/a.bam.bai new file mode 100644 index 00000000..12f5f510 Binary files /dev/null and b/src/samtools/samtools_flagstat/test_data/a.bam.bai differ diff --git a/src/samtools/samtools_flagstat/test_data/a_ref.flagstat b/src/samtools/samtools_flagstat/test_data/a_ref.flagstat new file mode 100644 index 00000000..5d8b9a73 --- /dev/null +++ b/src/samtools/samtools_flagstat/test_data/a_ref.flagstat @@ -0,0 +1,16 @@ +6 + 0 in total (QC-passed reads + QC-failed reads) +6 + 0 primary +0 + 0 secondary +0 + 0 supplementary +0 + 0 duplicates +0 + 0 primary duplicates +6 + 0 mapped (100.00% : N/A) +6 + 0 primary mapped (100.00% : N/A) +6 + 0 paired in sequencing +3 + 0 read1 +3 + 0 read2 +6 + 0 properly paired (100.00% : N/A) +6 + 0 with itself and mate mapped +0 + 0 singletons (0.00% : N/A) +0 + 0 with mate mapped to a different chr +0 + 0 with mate mapped to a different chr (mapQ>=5) diff --git a/src/samtools/samtools_flagstat/test_data/script.sh b/src/samtools/samtools_flagstat/test_data/script.sh new file mode 100755 index 00000000..fc32b48e --- /dev/null +++ b/src/samtools/samtools_flagstat/test_data/script.sh @@ -0,0 +1,14 @@ +#!/bin/bash + +# Download test data from snakemake wrapper + +wget https://raw.githubusercontent.com/snakemake/snakemake-wrappers/3a4f7004281efc176fd9af732ad88d00c47d432d/bio/samtools/flagstat/test/mapped/a.bam +samtools index a.bam +# samtools flagstat a.bam > a_ref.flagstat + + +# Download test data from nf-core module + +wget https://github.com/nf-core/test-datasets/raw/modules/data/genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam +wget https://github.com/nf-core/test-datasets/raw/modules/data/genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai +# samtools flagstat test.paired_end.sorted.bam > test_ref.paired_end.sorted.flagstat \ No newline at end of file diff --git a/src/samtools/samtools_flagstat/test_data/test.paired_end.sorted.bam b/src/samtools/samtools_flagstat/test_data/test.paired_end.sorted.bam new file mode 100644 index 00000000..85cccf14 Binary files /dev/null and b/src/samtools/samtools_flagstat/test_data/test.paired_end.sorted.bam differ diff --git a/src/samtools/samtools_flagstat/test_data/test.paired_end.sorted.bam.bai b/src/samtools/samtools_flagstat/test_data/test.paired_end.sorted.bam.bai new file mode 100644 index 00000000..0c6d5a96 Binary files /dev/null and b/src/samtools/samtools_flagstat/test_data/test.paired_end.sorted.bam.bai differ diff --git a/src/samtools/samtools_flagstat/test_data/test_ref.paired_end.sorted.flagstat b/src/samtools/samtools_flagstat/test_data/test_ref.paired_end.sorted.flagstat new file mode 100644 index 00000000..4028563d --- /dev/null +++ b/src/samtools/samtools_flagstat/test_data/test_ref.paired_end.sorted.flagstat @@ -0,0 +1,16 @@ +200 + 0 in total (QC-passed reads + QC-failed reads) +200 + 0 primary +0 + 0 secondary +0 + 0 supplementary +0 + 0 duplicates +0 + 0 primary duplicates +197 + 0 mapped (98.50% : N/A) +197 + 0 primary mapped (98.50% : N/A) +200 + 0 paired in sequencing +100 + 0 read1 +100 + 0 read2 +192 + 0 properly paired (96.00% : N/A) +194 + 0 with itself and mate mapped +3 + 0 singletons (1.50% : N/A) +0 + 0 with mate mapped to a different chr +0 + 0 with mate mapped to a different chr (mapQ>=5)