Skip to content

Commit

Permalink
Merge branch 'main' into samtools_sort
Browse files Browse the repository at this point in the history
  • Loading branch information
emmarousseau authored Apr 4, 2024
2 parents f6cdcfb + 1200bc3 commit 69f3d17
Show file tree
Hide file tree
Showing 24 changed files with 314 additions and 4 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@
- `salmon/salmon_quant`: Transcript quantification from RNA-seq data (PR #24).

* `samtools`:
- `samtools/flagstat`: Counts the number of alignments in SAM/BAM/CRAM files for each FLAG type (PR #31).
- `samtools/idxstats`: Reports alignment summary statistics for a SAM/BAM/CRAM file (PR #32).
- `samtools/samtools_sort`: Sort SAM/BAM/CRAM files (PR #36).

## MAJOR CHANGES
Expand Down
8 changes: 4 additions & 4 deletions src/gffread/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@

set -e

test_output_dir="${meta_resources_dir}test_data/test_output"
test_dir="${meta_resources_dir}test_data"
expected_output_dir="${meta_resources_dir}test_data/output"
test_output_dir="${meta_resources_dir}/test_data/test_output"
test_dir="${meta_resources_dir}/test_data"
expected_output_dir="${meta_resources_dir}/test_data/output"

mkdir -p "$test_output_dir"

Expand Down Expand Up @@ -108,4 +108,4 @@ rm -r "$test_output_dir"

echo "> All tests successful"

exit 0
exit 0
51 changes: 51 additions & 0 deletions src/samtools/samtools_flagstat/config.vsh.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
name: samtools_flagstat
namespace: samtools
description: Counts the number of alignments in SAM/BAM/CRAM files for each FLAG type.
keywords: [ stats, mapping, counts, bam, sam, cram ]
links:
homepage: https://www.htslib.org/
documentation: https://www.htslib.org/doc/samtools-flagstat.html
repository: https://github.com/samtools/samtools
references:
doi: [10.1093/bioinformatics/btp352, 10.1093/gigascience/giab008]
license: MIT/Expat

argument_groups:
- name: Inputs
arguments:
- name: --bam
type: file
description: |
BAM input files.
- name: --bai
type: file
description: |
BAM index file.
- name: Outputs
arguments:
- name: --output
type: file
description: |
File containing samtools stats output.
direction: output
example: output.flagstat

resources:
- type: bash_script
path: script.sh
test_resources:
- type: bash_script
path: test.sh
- type: file
path: test_data
engines:
- type: docker
image: quay.io/biocontainers/samtools:1.19.2--h50ea8bc_1
setup:
- type: docker
run: |
samtools --version 2>&1 | grep -E '^(samtools|Using htslib)' | \
sed 's#Using ##;s# \([0-9\.]*\)$#: \1#' > /var/software_versions.txt
runners:
- type: executable
- type: nextflow
13 changes: 13 additions & 0 deletions src/samtools/samtools_flagstat/help.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
```sh
samtools flagstat --help
```
Usage: samtools flagstat [options] <in.bam>
--input-fmt-option OPT[=VAL]
Specify a single input file format option in the form
of OPTION or OPTION=VALUE
-@, --threads INT
Number of additional threads to use [0]
--verbosity INT
Set level of verbosity
-O, --output-fmt FORMAT[,OPT[=VAL]]...
Specify output format (json, tsv)
11 changes: 11 additions & 0 deletions src/samtools/samtools_flagstat/script.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#!/bin/bash

## VIASH START
## VIASH END

set -e

samtools flagstat \
"$par_bam" \
> "$par_output"

47 changes: 47 additions & 0 deletions src/samtools/samtools_flagstat/test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
#!/bin/bash

test_dir="${meta_resources_dir}/test_data"
echo ">>> Testing $meta_functionality_name"

"$meta_executable" \
--bam "$test_dir/a.bam" \
--bai "$test_dir/a.bam.bai" \
--output "$test_dir/a.flagstat"

echo ">>> Checking whether output exists"
[ ! -f "$test_dir/a.flagstat" ] && echo "File 'a.flagstat' does not exist!" && exit 1

echo ">>> Checking whether output is non-empty"
[ ! -s "$test_dir/a.flagstat" ] && echo "File 'a.flagstat' is empty!" && exit 1

echo ">>> Checking whether output is correct"
diff "$test_dir/a.flagstat" "$test_dir/a_ref.flagstat" || \
(echo "Output file a.flagstat does not match expected output" && exit 1)

rm "$test_dir/a.flagstat"

############################################################################################

echo ">>> Testing $meta_functionality_name with singletons in the input"

"$meta_executable" \
--bam "$test_dir/test.paired_end.sorted.bam" \
--bai "$test_dir/test.paired_end.sorted.bam.bai" \
--output "$test_dir/test.paired_end.sorted.flagstat"

echo ">>> Checking whether output exists"
[ ! -f "$test_dir/test.paired_end.sorted.flagstat" ] && echo "File 'test.paired_end.sorted.flagstat' does not exist!" && exit 1

echo ">>> Checking whether output is non-empty"
[ ! -s "$test_dir/test.paired_end.sorted.flagstat" ] && echo "File 'test.paired_end.sorted.flagstat' is empty!" && exit 1

echo ">>> Checking whether output is correct"
diff "$test_dir/test.paired_end.sorted.flagstat" "$test_dir/test_ref.paired_end.sorted.flagstat" || \
(echo "Output file test.paired_end.sorted.flagstat does not match expected output" && exit 1)

rm "$test_dir/test.paired_end.sorted.flagstat"



echo "All tests succeeded!"
exit 0
Binary file added src/samtools/samtools_flagstat/test_data/a.bam
Binary file not shown.
Binary file added src/samtools/samtools_flagstat/test_data/a.bam.bai
Binary file not shown.
16 changes: 16 additions & 0 deletions src/samtools/samtools_flagstat/test_data/a_ref.flagstat
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
6 + 0 in total (QC-passed reads + QC-failed reads)
6 + 0 primary
0 + 0 secondary
0 + 0 supplementary
0 + 0 duplicates
0 + 0 primary duplicates
6 + 0 mapped (100.00% : N/A)
6 + 0 primary mapped (100.00% : N/A)
6 + 0 paired in sequencing
3 + 0 read1
3 + 0 read2
6 + 0 properly paired (100.00% : N/A)
6 + 0 with itself and mate mapped
0 + 0 singletons (0.00% : N/A)
0 + 0 with mate mapped to a different chr
0 + 0 with mate mapped to a different chr (mapQ>=5)
14 changes: 14 additions & 0 deletions src/samtools/samtools_flagstat/test_data/script.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#!/bin/bash

# Download test data from snakemake wrapper

wget https://raw.githubusercontent.com/snakemake/snakemake-wrappers/3a4f7004281efc176fd9af732ad88d00c47d432d/bio/samtools/flagstat/test/mapped/a.bam
samtools index a.bam
# samtools flagstat a.bam > a_ref.flagstat


# Download test data from nf-core module

wget https://github.com/nf-core/test-datasets/raw/modules/data/genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam
wget https://github.com/nf-core/test-datasets/raw/modules/data/genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai
# samtools flagstat test.paired_end.sorted.bam > test_ref.paired_end.sorted.flagstat
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
200 + 0 in total (QC-passed reads + QC-failed reads)
200 + 0 primary
0 + 0 secondary
0 + 0 supplementary
0 + 0 duplicates
0 + 0 primary duplicates
197 + 0 mapped (98.50% : N/A)
197 + 0 primary mapped (98.50% : N/A)
200 + 0 paired in sequencing
100 + 0 read1
100 + 0 read2
192 + 0 properly paired (96.00% : N/A)
194 + 0 with itself and mate mapped
3 + 0 singletons (1.50% : N/A)
0 + 0 with mate mapped to a different chr
0 + 0 with mate mapped to a different chr (mapQ>=5)
53 changes: 53 additions & 0 deletions src/samtools/samtools_idxstats/config.vsh.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
name: samtools_idxstats
namespace: samtools
description: Reports alignment summary statistics for a BAM file.
keywords: [stats, mapping, counts, chromosome, bam, sam, cram]
links:
homepage: https://www.htslib.org/
documentation: https://www.htslib.org/doc/samtools-idxstats.html
repository: https://github.com/samtools/samtools
references:
doi: 10.1093/bioinformatics/btp352, 10.1093/gigascience/giab008
license: MIT/Expat

argument_groups:
- name: Inputs
arguments:
- name: "--bam"
type: file
description: BAM input file.
- name: "--bai"
type: file
description: BAM index file.
- name: "--fasta"
type: file
description: Reference file the CRAM was created with (optional).
- name: Outputs
arguments:
- name: "--output"
type: file
description: |
File containing samtools stats output in tab-delimited format.
required: true
must_exist: false
example: output.idxstats

resources:
- type: bash_script
path: script.sh
test_resources:
- type: bash_script
path: test.sh
- type: file
path: test_data
engines:
- type: docker
image: quay.io/biocontainers/samtools:1.19.2--h50ea8bc_1
setup:
- type: docker
run: |
samtools --version 2>&1 | grep -E '^(samtools|Using htslib)' | \
sed 's#Using ##;s# \([0-9\.]*\)$#: \1#' > /var/software_versions.txt
runners:
- type: executable
- type: nextflow
12 changes: 12 additions & 0 deletions src/samtools/samtools_idxstats/help.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
```
samtools idxstats
```

Usage: samtools idxstats [options] <in.bam>
--input-fmt-option OPT[=VAL]
Specify a single input file format option in the form
of OPTION or OPTION=VALUE
-@, --threads INT
Number of additional threads to use [0]
--verbosity INT
Set level of verbosity
8 changes: 8 additions & 0 deletions src/samtools/samtools_idxstats/script.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#!/bin/bash

## VIASH START
## VIASH END

set -e

samtools idxstats "$par_bam" > "$par_output"
49 changes: 49 additions & 0 deletions src/samtools/samtools_idxstats/test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
#!/bin/bash

test_dir="${meta_resources_dir}/test_data"
echo ">>> Testing $meta_functionality_name"

"$meta_executable" \
--bam "$test_dir/a.sorted.bam" \
--bai "$test_dir/a.sorted.bam.bai" \
--output "$test_dir/a.sorted.idxstats"

echo ">>> Checking whether output exists"
[ ! -f "$test_dir/a.sorted.idxstats" ] && echo "File 'a.sorted.idxstats' does not exist!" && exit 1

echo ">>> Checking whether output is non-empty"
[ ! -s "$test_dir/a.sorted.idxstats" ] && echo "File 'a.sorted.idxstats' is empty!" && exit 1

echo ">>> Checking whether output is correct"
diff "$test_dir/a.sorted.idxstats" "$test_dir/a_ref.sorted.idxstats" || \
(echo "Output file a.sorted.idxstats does not match expected output" && exit 1)

rm "$test_dir/a.sorted.idxstats"

############################################################################################

echo ">>> Testing $meta_functionality_name with singletons in the input"

"$meta_executable" \
--bam "$test_dir/test.paired_end.sorted.bam" \
--bai "$test_dir/test.paired_end.sorted.bam.bai" \
--output "$test_dir/test.paired_end.sorted.idxstats"

echo ">>> Checking whether output exists"
[ ! -f "$test_dir/test.paired_end.sorted.idxstats" ] && \
echo "File 'test.paired_end.sorted.idxstats' does not exist!" && exit 1

echo ">>> Checking whether output is non-empty"
[ ! -s "$test_dir/test.paired_end.sorted.idxstats" ] && \
echo "File 'test.paired_end.sorted.idxstats' is empty!" && exit 1

echo ">>> Checking whether output is correct"
diff "$test_dir/test.paired_end.sorted.idxstats" "$test_dir/test_ref.paired_end.sorted.idxstats" || \
(echo "Output file test.paired_end.sorted.idxstats does not match expected output" && exit 1)

rm "$test_dir/test.paired_end.sorted.idxstats"

############################################################################################

echo "All tests succeeded!"
exit 0
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
xx 20 6 0
* 0 0 0
14 changes: 14 additions & 0 deletions src/samtools/samtools_idxstats/test_data/script.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#!/bin/bash

# dowload test data from snakemake wrapper
if [ ! -d /tmp/idxstats_source ]; then
git clone --depth 1 --single-branch --branch master https://github.com/snakemake/snakemake-wrappers.git /tmp/idxstats_source
fi

cp -r /tmp/idxstats_source/bio/samtools/idxstats/test/mapped/* src/samtools/idxstats/test_data
# samtools idxstats a.sorted.bam > a.sorted.idxstats

# dowload test data from nf-core module
wget https://github.com/nf-core/test-datasets/raw/modules/data/genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam
wget https://github.com/nf-core/test-datasets/raw/modules/data/genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai
# samtools idxstats test.paired_end.sorted.bam > test_ref.paired_end.sorted.idxstats
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
MT192765.1 29829 197 3
* 0 0 0

0 comments on commit 69f3d17

Please sign in to comment.