diff --git a/CHANGELOG.md b/CHANGELOG.md index ee614b2b..a4f6be08 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -36,6 +36,8 @@ - `salmon/salmon_quant`: Transcript quantification from RNA-seq data (PR #24). * `samtools`: + - `samtools/flagstat`: Counts the number of alignments in SAM/BAM/CRAM files for each FLAG type (PR #31). + - `samtools/idxstats`: Reports alignment summary statistics for a SAM/BAM/CRAM file (PR #32). - `samtools/samtools_sort`: Sort SAM/BAM/CRAM files (PR #36). ## MAJOR CHANGES diff --git a/src/gffread/test.sh b/src/gffread/test.sh index 6d38012b..326fce50 100755 --- a/src/gffread/test.sh +++ b/src/gffread/test.sh @@ -5,9 +5,9 @@ set -e -test_output_dir="${meta_resources_dir}test_data/test_output" -test_dir="${meta_resources_dir}test_data" -expected_output_dir="${meta_resources_dir}test_data/output" +test_output_dir="${meta_resources_dir}/test_data/test_output" +test_dir="${meta_resources_dir}/test_data" +expected_output_dir="${meta_resources_dir}/test_data/output" mkdir -p "$test_output_dir" @@ -108,4 +108,4 @@ rm -r "$test_output_dir" echo "> All tests successful" -exit 0 \ No newline at end of file +exit 0 diff --git a/src/samtools/samtools_flagstat/config.vsh.yaml b/src/samtools/samtools_flagstat/config.vsh.yaml new file mode 100644 index 00000000..20a5e3cd --- /dev/null +++ b/src/samtools/samtools_flagstat/config.vsh.yaml @@ -0,0 +1,51 @@ +name: samtools_flagstat +namespace: samtools +description: Counts the number of alignments in SAM/BAM/CRAM files for each FLAG type. +keywords: [ stats, mapping, counts, bam, sam, cram ] +links: + homepage: https://www.htslib.org/ + documentation: https://www.htslib.org/doc/samtools-flagstat.html + repository: https://github.com/samtools/samtools +references: + doi: [10.1093/bioinformatics/btp352, 10.1093/gigascience/giab008] +license: MIT/Expat + +argument_groups: + - name: Inputs + arguments: + - name: --bam + type: file + description: | + BAM input files. + - name: --bai + type: file + description: | + BAM index file. + - name: Outputs + arguments: + - name: --output + type: file + description: | + File containing samtools stats output. + direction: output + example: output.flagstat + +resources: + - type: bash_script + path: script.sh +test_resources: + - type: bash_script + path: test.sh + - type: file + path: test_data +engines: + - type: docker + image: quay.io/biocontainers/samtools:1.19.2--h50ea8bc_1 + setup: + - type: docker + run: | + samtools --version 2>&1 | grep -E '^(samtools|Using htslib)' | \ + sed 's#Using ##;s# \([0-9\.]*\)$#: \1#' > /var/software_versions.txt +runners: +- type: executable +- type: nextflow \ No newline at end of file diff --git a/src/samtools/samtools_flagstat/help.txt b/src/samtools/samtools_flagstat/help.txt new file mode 100644 index 00000000..fe54d20c --- /dev/null +++ b/src/samtools/samtools_flagstat/help.txt @@ -0,0 +1,13 @@ +```sh +samtools flagstat --help +``` +Usage: samtools flagstat [options] + --input-fmt-option OPT[=VAL] + Specify a single input file format option in the form + of OPTION or OPTION=VALUE + -@, --threads INT + Number of additional threads to use [0] + --verbosity INT + Set level of verbosity + -O, --output-fmt FORMAT[,OPT[=VAL]]... + Specify output format (json, tsv) \ No newline at end of file diff --git a/src/samtools/samtools_flagstat/script.sh b/src/samtools/samtools_flagstat/script.sh new file mode 100644 index 00000000..beac3703 --- /dev/null +++ b/src/samtools/samtools_flagstat/script.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +## VIASH START +## VIASH END + +set -e + +samtools flagstat \ + "$par_bam" \ + > "$par_output" + \ No newline at end of file diff --git a/src/samtools/samtools_flagstat/test.sh b/src/samtools/samtools_flagstat/test.sh new file mode 100644 index 00000000..647c64af --- /dev/null +++ b/src/samtools/samtools_flagstat/test.sh @@ -0,0 +1,47 @@ +#!/bin/bash + +test_dir="${meta_resources_dir}/test_data" +echo ">>> Testing $meta_functionality_name" + +"$meta_executable" \ + --bam "$test_dir/a.bam" \ + --bai "$test_dir/a.bam.bai" \ + --output "$test_dir/a.flagstat" + +echo ">>> Checking whether output exists" +[ ! -f "$test_dir/a.flagstat" ] && echo "File 'a.flagstat' does not exist!" && exit 1 + +echo ">>> Checking whether output is non-empty" +[ ! -s "$test_dir/a.flagstat" ] && echo "File 'a.flagstat' is empty!" && exit 1 + +echo ">>> Checking whether output is correct" +diff "$test_dir/a.flagstat" "$test_dir/a_ref.flagstat" || \ + (echo "Output file a.flagstat does not match expected output" && exit 1) + +rm "$test_dir/a.flagstat" + +############################################################################################ + +echo ">>> Testing $meta_functionality_name with singletons in the input" + +"$meta_executable" \ + --bam "$test_dir/test.paired_end.sorted.bam" \ + --bai "$test_dir/test.paired_end.sorted.bam.bai" \ + --output "$test_dir/test.paired_end.sorted.flagstat" + +echo ">>> Checking whether output exists" +[ ! -f "$test_dir/test.paired_end.sorted.flagstat" ] && echo "File 'test.paired_end.sorted.flagstat' does not exist!" && exit 1 + +echo ">>> Checking whether output is non-empty" +[ ! -s "$test_dir/test.paired_end.sorted.flagstat" ] && echo "File 'test.paired_end.sorted.flagstat' is empty!" && exit 1 + +echo ">>> Checking whether output is correct" +diff "$test_dir/test.paired_end.sorted.flagstat" "$test_dir/test_ref.paired_end.sorted.flagstat" || \ + (echo "Output file test.paired_end.sorted.flagstat does not match expected output" && exit 1) + +rm "$test_dir/test.paired_end.sorted.flagstat" + + + +echo "All tests succeeded!" +exit 0 \ No newline at end of file diff --git a/src/samtools/samtools_flagstat/test_data/a.bam b/src/samtools/samtools_flagstat/test_data/a.bam new file mode 100644 index 00000000..dba1268a Binary files /dev/null and b/src/samtools/samtools_flagstat/test_data/a.bam differ diff --git a/src/samtools/samtools_flagstat/test_data/a.bam.bai b/src/samtools/samtools_flagstat/test_data/a.bam.bai new file mode 100644 index 00000000..12f5f510 Binary files /dev/null and b/src/samtools/samtools_flagstat/test_data/a.bam.bai differ diff --git a/src/samtools/samtools_flagstat/test_data/a_ref.flagstat b/src/samtools/samtools_flagstat/test_data/a_ref.flagstat new file mode 100644 index 00000000..5d8b9a73 --- /dev/null +++ b/src/samtools/samtools_flagstat/test_data/a_ref.flagstat @@ -0,0 +1,16 @@ +6 + 0 in total (QC-passed reads + QC-failed reads) +6 + 0 primary +0 + 0 secondary +0 + 0 supplementary +0 + 0 duplicates +0 + 0 primary duplicates +6 + 0 mapped (100.00% : N/A) +6 + 0 primary mapped (100.00% : N/A) +6 + 0 paired in sequencing +3 + 0 read1 +3 + 0 read2 +6 + 0 properly paired (100.00% : N/A) +6 + 0 with itself and mate mapped +0 + 0 singletons (0.00% : N/A) +0 + 0 with mate mapped to a different chr +0 + 0 with mate mapped to a different chr (mapQ>=5) diff --git a/src/samtools/samtools_flagstat/test_data/script.sh b/src/samtools/samtools_flagstat/test_data/script.sh new file mode 100755 index 00000000..fc32b48e --- /dev/null +++ b/src/samtools/samtools_flagstat/test_data/script.sh @@ -0,0 +1,14 @@ +#!/bin/bash + +# Download test data from snakemake wrapper + +wget https://raw.githubusercontent.com/snakemake/snakemake-wrappers/3a4f7004281efc176fd9af732ad88d00c47d432d/bio/samtools/flagstat/test/mapped/a.bam +samtools index a.bam +# samtools flagstat a.bam > a_ref.flagstat + + +# Download test data from nf-core module + +wget https://github.com/nf-core/test-datasets/raw/modules/data/genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam +wget https://github.com/nf-core/test-datasets/raw/modules/data/genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai +# samtools flagstat test.paired_end.sorted.bam > test_ref.paired_end.sorted.flagstat \ No newline at end of file diff --git a/src/samtools/samtools_flagstat/test_data/test.paired_end.sorted.bam b/src/samtools/samtools_flagstat/test_data/test.paired_end.sorted.bam new file mode 100644 index 00000000..85cccf14 Binary files /dev/null and b/src/samtools/samtools_flagstat/test_data/test.paired_end.sorted.bam differ diff --git a/src/samtools/samtools_flagstat/test_data/test.paired_end.sorted.bam.bai b/src/samtools/samtools_flagstat/test_data/test.paired_end.sorted.bam.bai new file mode 100644 index 00000000..0c6d5a96 Binary files /dev/null and b/src/samtools/samtools_flagstat/test_data/test.paired_end.sorted.bam.bai differ diff --git a/src/samtools/samtools_flagstat/test_data/test_ref.paired_end.sorted.flagstat b/src/samtools/samtools_flagstat/test_data/test_ref.paired_end.sorted.flagstat new file mode 100644 index 00000000..4028563d --- /dev/null +++ b/src/samtools/samtools_flagstat/test_data/test_ref.paired_end.sorted.flagstat @@ -0,0 +1,16 @@ +200 + 0 in total (QC-passed reads + QC-failed reads) +200 + 0 primary +0 + 0 secondary +0 + 0 supplementary +0 + 0 duplicates +0 + 0 primary duplicates +197 + 0 mapped (98.50% : N/A) +197 + 0 primary mapped (98.50% : N/A) +200 + 0 paired in sequencing +100 + 0 read1 +100 + 0 read2 +192 + 0 properly paired (96.00% : N/A) +194 + 0 with itself and mate mapped +3 + 0 singletons (1.50% : N/A) +0 + 0 with mate mapped to a different chr +0 + 0 with mate mapped to a different chr (mapQ>=5) diff --git a/src/samtools/samtools_idxstats/config.vsh.yaml b/src/samtools/samtools_idxstats/config.vsh.yaml new file mode 100644 index 00000000..781bd8be --- /dev/null +++ b/src/samtools/samtools_idxstats/config.vsh.yaml @@ -0,0 +1,53 @@ +name: samtools_idxstats +namespace: samtools +description: Reports alignment summary statistics for a BAM file. +keywords: [stats, mapping, counts, chromosome, bam, sam, cram] +links: + homepage: https://www.htslib.org/ + documentation: https://www.htslib.org/doc/samtools-idxstats.html + repository: https://github.com/samtools/samtools +references: + doi: 10.1093/bioinformatics/btp352, 10.1093/gigascience/giab008 +license: MIT/Expat + +argument_groups: + - name: Inputs + arguments: + - name: "--bam" + type: file + description: BAM input file. + - name: "--bai" + type: file + description: BAM index file. + - name: "--fasta" + type: file + description: Reference file the CRAM was created with (optional). + - name: Outputs + arguments: + - name: "--output" + type: file + description: | + File containing samtools stats output in tab-delimited format. + required: true + must_exist: false + example: output.idxstats + +resources: + - type: bash_script + path: script.sh +test_resources: + - type: bash_script + path: test.sh + - type: file + path: test_data +engines: + - type: docker + image: quay.io/biocontainers/samtools:1.19.2--h50ea8bc_1 + setup: + - type: docker + run: | + samtools --version 2>&1 | grep -E '^(samtools|Using htslib)' | \ + sed 's#Using ##;s# \([0-9\.]*\)$#: \1#' > /var/software_versions.txt +runners: +- type: executable +- type: nextflow \ No newline at end of file diff --git a/src/samtools/samtools_idxstats/help.txt b/src/samtools/samtools_idxstats/help.txt new file mode 100644 index 00000000..7b63ab17 --- /dev/null +++ b/src/samtools/samtools_idxstats/help.txt @@ -0,0 +1,12 @@ +``` +samtools idxstats +``` + +Usage: samtools idxstats [options] + --input-fmt-option OPT[=VAL] + Specify a single input file format option in the form + of OPTION or OPTION=VALUE + -@, --threads INT + Number of additional threads to use [0] + --verbosity INT + Set level of verbosity \ No newline at end of file diff --git a/src/samtools/samtools_idxstats/script.sh b/src/samtools/samtools_idxstats/script.sh new file mode 100644 index 00000000..9f8d4af4 --- /dev/null +++ b/src/samtools/samtools_idxstats/script.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +## VIASH START +## VIASH END + +set -e + +samtools idxstats "$par_bam" > "$par_output" \ No newline at end of file diff --git a/src/samtools/samtools_idxstats/test.sh b/src/samtools/samtools_idxstats/test.sh new file mode 100644 index 00000000..1459ec08 --- /dev/null +++ b/src/samtools/samtools_idxstats/test.sh @@ -0,0 +1,49 @@ +#!/bin/bash + +test_dir="${meta_resources_dir}/test_data" +echo ">>> Testing $meta_functionality_name" + +"$meta_executable" \ + --bam "$test_dir/a.sorted.bam" \ + --bai "$test_dir/a.sorted.bam.bai" \ + --output "$test_dir/a.sorted.idxstats" + +echo ">>> Checking whether output exists" +[ ! -f "$test_dir/a.sorted.idxstats" ] && echo "File 'a.sorted.idxstats' does not exist!" && exit 1 + +echo ">>> Checking whether output is non-empty" +[ ! -s "$test_dir/a.sorted.idxstats" ] && echo "File 'a.sorted.idxstats' is empty!" && exit 1 + +echo ">>> Checking whether output is correct" +diff "$test_dir/a.sorted.idxstats" "$test_dir/a_ref.sorted.idxstats" || \ + (echo "Output file a.sorted.idxstats does not match expected output" && exit 1) + +rm "$test_dir/a.sorted.idxstats" + +############################################################################################ + +echo ">>> Testing $meta_functionality_name with singletons in the input" + +"$meta_executable" \ + --bam "$test_dir/test.paired_end.sorted.bam" \ + --bai "$test_dir/test.paired_end.sorted.bam.bai" \ + --output "$test_dir/test.paired_end.sorted.idxstats" + +echo ">>> Checking whether output exists" +[ ! -f "$test_dir/test.paired_end.sorted.idxstats" ] && \ + echo "File 'test.paired_end.sorted.idxstats' does not exist!" && exit 1 + +echo ">>> Checking whether output is non-empty" +[ ! -s "$test_dir/test.paired_end.sorted.idxstats" ] && \ + echo "File 'test.paired_end.sorted.idxstats' is empty!" && exit 1 + +echo ">>> Checking whether output is correct" +diff "$test_dir/test.paired_end.sorted.idxstats" "$test_dir/test_ref.paired_end.sorted.idxstats" || \ + (echo "Output file test.paired_end.sorted.idxstats does not match expected output" && exit 1) + +rm "$test_dir/test.paired_end.sorted.idxstats" + +############################################################################################ + +echo "All tests succeeded!" +exit 0 \ No newline at end of file diff --git a/src/samtools/samtools_idxstats/test_data/a.sorted.bam b/src/samtools/samtools_idxstats/test_data/a.sorted.bam new file mode 100644 index 00000000..1c81d7cd Binary files /dev/null and b/src/samtools/samtools_idxstats/test_data/a.sorted.bam differ diff --git a/src/samtools/samtools_idxstats/test_data/a.sorted.bam.bai b/src/samtools/samtools_idxstats/test_data/a.sorted.bam.bai new file mode 100644 index 00000000..4f08f5d5 Binary files /dev/null and b/src/samtools/samtools_idxstats/test_data/a.sorted.bam.bai differ diff --git a/src/samtools/samtools_idxstats/test_data/a_ref.sorted.idxstats b/src/samtools/samtools_idxstats/test_data/a_ref.sorted.idxstats new file mode 100644 index 00000000..27862419 --- /dev/null +++ b/src/samtools/samtools_idxstats/test_data/a_ref.sorted.idxstats @@ -0,0 +1,2 @@ +xx 20 6 0 +* 0 0 0 diff --git a/src/samtools/samtools_idxstats/test_data/script.sh b/src/samtools/samtools_idxstats/test_data/script.sh new file mode 100755 index 00000000..0e28a4c6 --- /dev/null +++ b/src/samtools/samtools_idxstats/test_data/script.sh @@ -0,0 +1,14 @@ +#!/bin/bash + +# dowload test data from snakemake wrapper +if [ ! -d /tmp/idxstats_source ]; then + git clone --depth 1 --single-branch --branch master https://github.com/snakemake/snakemake-wrappers.git /tmp/idxstats_source +fi + +cp -r /tmp/idxstats_source/bio/samtools/idxstats/test/mapped/* src/samtools/idxstats/test_data +# samtools idxstats a.sorted.bam > a.sorted.idxstats + +# dowload test data from nf-core module +wget https://github.com/nf-core/test-datasets/raw/modules/data/genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam +wget https://github.com/nf-core/test-datasets/raw/modules/data/genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai +# samtools idxstats test.paired_end.sorted.bam > test_ref.paired_end.sorted.idxstats \ No newline at end of file diff --git a/src/samtools/samtools_idxstats/test_data/test.paired_end.sorted.bam b/src/samtools/samtools_idxstats/test_data/test.paired_end.sorted.bam new file mode 100644 index 00000000..85cccf14 Binary files /dev/null and b/src/samtools/samtools_idxstats/test_data/test.paired_end.sorted.bam differ diff --git a/src/samtools/samtools_idxstats/test_data/test.paired_end.sorted.bam.bai b/src/samtools/samtools_idxstats/test_data/test.paired_end.sorted.bam.bai new file mode 100644 index 00000000..0c6d5a96 Binary files /dev/null and b/src/samtools/samtools_idxstats/test_data/test.paired_end.sorted.bam.bai differ diff --git a/src/samtools/samtools_idxstats/test_data/test_ref.paired_end.sorted.idxstats b/src/samtools/samtools_idxstats/test_data/test_ref.paired_end.sorted.idxstats new file mode 100644 index 00000000..77bc11b8 --- /dev/null +++ b/src/samtools/samtools_idxstats/test_data/test_ref.paired_end.sorted.idxstats @@ -0,0 +1,2 @@ +MT192765.1 29829 197 3 +* 0 0 0