forked from viash-hub/biobox
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Add tests and reference outputs, update changelog * final touches, fix script * update changelog * Minor changes, paths, config and script --------- Co-authored-by: Robrecht Cannoodt <[email protected]>
- Loading branch information
1 parent
3ada28a
commit 8935a78
Showing
11 changed files
with
4,938 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,166 @@ | ||
name: samtools_stats | ||
namespace: samtools | ||
description: Reports alignment summary statistics for a BAM file. | ||
keywords: [statistics, counts, bam, sam, cram] | ||
links: | ||
homepage: https://www.htslib.org/ | ||
documentation: https://www.htslib.org/doc/samtools-idxstats.html | ||
repository: https://github.com/samtools/samtools | ||
references: | ||
doi: 10.1093/bioinformatics/btp352, 10.1093/gigascience/giab008 | ||
license: MIT/Expat | ||
|
||
argument_groups: | ||
- name: Inputs | ||
arguments: | ||
- name: --input | ||
type: file | ||
description: | | ||
Input file. | ||
required: true | ||
must_exist: true | ||
- name: --bai | ||
type: file | ||
description: | | ||
Index file. | ||
- name: --fasta | ||
type: file | ||
description: | | ||
Reference file the CRAM was created with. | ||
- name: --coverage | ||
alternatives: -c | ||
type: integer | ||
description: | | ||
Coverage distribution min,max,step [1,1000,1]. | ||
multiple: true | ||
multiple_sep: ',' | ||
- name: --remove_dups | ||
alternatives: -d | ||
type: boolean_true | ||
description: | | ||
Exclude from statistics reads marked as duplicates. | ||
- name: --customized_index_file | ||
alternatives: -X | ||
type: boolean_true | ||
description: | | ||
Use a customized index file. | ||
- name: --required_flag | ||
alternatives: -f | ||
type: string | ||
description: | | ||
Required flag, 0 for unset. See also `samtools flags`. | ||
default: "0" | ||
- name: --filtering_flag | ||
alternatives: -F | ||
type: string | ||
description: | | ||
Filtering flag, 0 for unset. See also `samtools flags`. | ||
default: "0" | ||
- name: --GC_depth | ||
type: double | ||
description: | | ||
The size of GC-depth bins (decreasing bin size increases memory requirement). | ||
default: 20000.0 | ||
- name: --insert_size | ||
alternatives: -i | ||
type: integer | ||
description: | | ||
Maximum insert size. | ||
default: 8000 | ||
- name: --id | ||
alternatives: -I | ||
type: string | ||
description: | | ||
Include only listed read group or sample name. | ||
- name: --read_length | ||
alternatives: -l | ||
type: integer | ||
description: | | ||
Include in the statistics only reads with the given read length. | ||
default: -1 | ||
- name: --most_inserts | ||
alternatives: -m | ||
type: double | ||
description: | | ||
Report only the main part of inserts. | ||
default: 0.99 | ||
- name: --split_prefix | ||
alternatives: -P | ||
type: string | ||
description: | | ||
Path or string prefix for filepaths output by --split (default is input filename). | ||
- name: --trim_quality | ||
alternatives: -q | ||
type: integer | ||
description: | | ||
The BWA trimming parameter. | ||
default: 0 | ||
- name: --ref_seq | ||
alternatives: -r | ||
type: file | ||
description: | | ||
Reference sequence (required for GC-depth and mismatches-per-cycle calculation). | ||
- name: --split | ||
alternatives: -S | ||
type: string | ||
description: | | ||
Also write statistics to separate files split by tagged field. | ||
- name: --target_regions | ||
alternatives: -t | ||
type: file | ||
description: | | ||
Do stats in these regions only. Tab-delimited file chr,from,to, 1-based, inclusive. | ||
- name: --sparse | ||
alternatives: -x | ||
type: boolean_true | ||
description: | | ||
Suppress outputting IS rows where there are no insertions. | ||
- name: --remove_overlaps | ||
alternatives: -p | ||
type: boolean_true | ||
description: | | ||
Remove overlaps of paired-end reads from coverage and base count computations. | ||
- name: --cov_threshold | ||
alternatives: -g | ||
type: integer | ||
description: | | ||
Only bases with coverage above this value will be included in the target percentage computation. | ||
default: 0 | ||
- name: --input_fmt_option | ||
type: string | ||
description: | | ||
Specify a single input file format option in the form of OPTION or OPTION=VALUE. | ||
- name: --reference | ||
type: file | ||
description: | | ||
Reference sequence FASTA FILE. | ||
- name: Outputs | ||
arguments: | ||
- name: --output | ||
alternatives: -o | ||
type: file | ||
description: | | ||
Output file. | ||
default: "out.txt" | ||
required: true | ||
direction: output | ||
|
||
resources: | ||
- type: bash_script | ||
path: script.sh | ||
test_resources: | ||
- type: bash_script | ||
path: test.sh | ||
- type: file | ||
path: test_data | ||
engines: | ||
- type: docker | ||
image: quay.io/biocontainers/samtools:1.19.2--h50ea8bc_1 | ||
setup: | ||
- type: docker | ||
run: | | ||
samtools --version 2>&1 | grep -E '^(samtools|Using htslib)' | \ | ||
sed 's#Using ##;s# \([0-9\.]*\)$#: \1#' > /var/software_versions.txt | ||
runners: | ||
- type: executable | ||
- type: nextflow |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
``` | ||
samtools stats -h | ||
``` | ||
|
||
Usage: samtools stats [OPTIONS] file.bam | ||
samtools stats [OPTIONS] file.bam chr:from-to | ||
Options: | ||
-c, --coverage <int>,<int>,<int> Coverage distribution min,max,step [1,1000,1] | ||
-d, --remove-dups Exclude from statistics reads marked as duplicates | ||
-X, --customized-index-file Use a customized index file | ||
-f, --required-flag <str|int> Required flag, 0 for unset. See also `samtools flags` [0] | ||
-F, --filtering-flag <str|int> Filtering flag, 0 for unset. See also `samtools flags` [0] | ||
--GC-depth <float> the size of GC-depth bins (decreasing bin size increases memory requirement) [2e4] | ||
-h, --help This help message | ||
-i, --insert-size <int> Maximum insert size [8000] | ||
-I, --id <string> Include only listed read group or sample name | ||
-l, --read-length <int> Include in the statistics only reads with the given read length [-1] | ||
-m, --most-inserts <float> Report only the main part of inserts [0.99] | ||
-P, --split-prefix <str> Path or string prefix for filepaths output by -S (default is input filename) | ||
-q, --trim-quality <int> The BWA trimming parameter [0] | ||
-r, --ref-seq <file> Reference sequence (required for GC-depth and mismatches-per-cycle calculation). | ||
-s, --sam Ignored (input format is auto-detected). | ||
-S, --split <tag> Also write statistics to separate files split by tagged field. | ||
-t, --target-regions <file> Do stats in these regions only. Tab-delimited file chr,from,to, 1-based, inclusive. | ||
-x, --sparse Suppress outputting IS rows where there are no insertions. | ||
-p, --remove-overlaps Remove overlaps of paired-end reads from coverage and base count computations. | ||
-g, --cov-threshold <int> Only bases with coverage above this value will be included in the target percentage computation [0] | ||
--input-fmt-option OPT[=VAL] | ||
Specify a single input file format option in the form | ||
of OPTION or OPTION=VALUE | ||
--reference FILE | ||
Reference sequence FASTA FILE [null] | ||
-@, --threads INT | ||
Number of additional threads to use [0] | ||
--verbosity INT | ||
Set level of verbosity |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
#!/bin/bash | ||
|
||
## VIASH START | ||
## VIASH END | ||
|
||
set -e | ||
|
||
[[ "$par_remove_dups" == "false" ]] && unset par_remove_dups | ||
[[ "$par_customized_index_file" == "false" ]] && unset par_customized_index_file | ||
[[ "$par_sparse" == "false" ]] && unset par_sparse | ||
[[ "$par_remove_overlaps" == "false" ]] && unset par_remove_overlaps | ||
|
||
samtools stats \ | ||
${par_coverage:+-c "$par_coverage"} \ | ||
${par_remove_dups:+-d} \ | ||
${par_required_flag:+-f "$par_required_flag"} \ | ||
${par_filtering_flag:+-F "$par_filtering_flag"} \ | ||
${par_GC_depth:+--GC-depth "$par_GC_depth"} \ | ||
${par_insert_size:+-i "$par_insert_size"} \ | ||
${par_id:+-I "$par_id"} \ | ||
${par_read_length:+-l "$par_read_length"} \ | ||
${par_most_inserts:+-m "$par_most_inserts"} \ | ||
${par_split_prefix:+-P "$par_split_prefix"} \ | ||
${par_trim_quality:+-q "$par_trim_quality"} \ | ||
${par_ref_seq:+-r "$par_ref_seq"} \ | ||
${par_split:+-S "$par_split"} \ | ||
${par_target_regions:+-t "$par_target_regions"} \ | ||
${par_sparse:+-x} \ | ||
${par_remove_overlaps:+-p} \ | ||
${par_cov_threshold:+-g "$par_cov_threshold"} \ | ||
${par_input_fmt_option:+-O "$par_input_fmt_option"} \ | ||
${par_reference:+-R "$par_reference"} \ | ||
"$par_input" \ | ||
> "$par_output" | ||
|
||
exit 0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
#!/bin/bash | ||
|
||
test_dir="${meta_resources_dir}/test_data" | ||
|
||
############################################################################################ | ||
|
||
echo ">>> Test 1: $meta_functionality_name" | ||
"$meta_executable" \ | ||
--input "$test_dir/test.paired_end.sorted.bam" \ | ||
--bai "$test_dir/test.paired_end.sorted.bam.bai" \ | ||
--output "$test_dir/test.paired_end.sorted.txt" | ||
|
||
echo ">>> Checking whether output exists" | ||
[ ! -f "$test_dir/test.paired_end.sorted.txt" ] && echo "File 'test.paired_end.sorted.txt' does not exist!" && exit 1 | ||
|
||
echo ">>> Checking whether output is non-empty" | ||
[ ! -s "$test_dir/test.paired_end.sorted.txt" ] && echo "File 'test.paired_end.sorted.txt' is empty!" && exit 1 | ||
|
||
echo ">>> Checking whether output is correct" | ||
# compare using diff, ignoring the line stating the command that was passed. | ||
diff <(grep -v "^# The command" "$test_dir/test.paired_end.sorted.txt") \ | ||
<(grep -v "^# The command" "$test_dir/ref.paired_end.sorted.txt") || \ | ||
(echo "Output file ref.paired_end.sorted.txt does not match expected output" && exit 1) | ||
|
||
rm "$test_dir/test.paired_end.sorted.txt" | ||
|
||
############################################################################################ | ||
|
||
echo ">>> Test 2: $meta_functionality_name with --remove_dups" | ||
"$meta_executable" \ | ||
--remove_dups \ | ||
--input "$test_dir/test.paired_end.sorted.bam" \ | ||
--bai "$test_dir/test.paired_end.sorted.bam.bai" \ | ||
--output "$test_dir/test.d.paired_end.sorted.txt" | ||
|
||
echo ">>> Checking whether output exists" | ||
[ ! -f "$test_dir/ref.d.paired_end.sorted.txt" ] && echo "File 'ref.d.paired_end.sorted.txt' does not exist!" && exit 1 | ||
|
||
echo ">>> Checking whether output is non-empty" | ||
[ ! -s "$test_dir/ref.d.paired_end.sorted.txt" ] && echo "File 'ref.d.paired_end.sorted.txt' is empty!" && exit 1 | ||
|
||
echo ">>> Checking whether output is correct" | ||
# compare using diff, ignoring the line stating the command that was passed. | ||
diff <(grep -v "^# The command" "$test_dir/test.d.paired_end.sorted.txt") \ | ||
<(grep -v "^# The command" "$test_dir/ref.d.paired_end.sorted.txt") || \ | ||
(echo "Output file ref.d.paired_end.sorted.txt does not match expected output" && exit 1) | ||
|
||
rm "$test_dir/test.d.paired_end.sorted.txt" | ||
|
||
############################################################################################ | ||
|
||
echo ">>> Test 3: $meta_functionality_name with --remove_overlaps" | ||
"$meta_executable" \ | ||
--remove_overlaps \ | ||
--input "$test_dir/test.paired_end.sorted.bam" \ | ||
--bai "$test_dir/test.paired_end.sorted.bam.bai" \ | ||
--output "$test_dir/test.p.paired_end.sorted.txt" | ||
|
||
echo ">>> Checking whether output exists" | ||
[ ! -f "$test_dir/ref.p.paired_end.sorted.txt" ] && echo "File 'ref.p.paired_end.sorted.txt' does not exist!" && exit 1 | ||
|
||
echo ">>> Checking whether output is non-empty" | ||
[ ! -s "$test_dir/ref.p.paired_end.sorted.txt" ] && echo "File 'ref.p.paired_end.sorted.txt' is empty!" && exit 1 | ||
|
||
|
||
echo ">>> Checking whether output is correct" | ||
# compare using diff, ignoring the line stating the command that was passed. | ||
diff <(grep -v "^# The command" "$test_dir/test.p.paired_end.sorted.txt") \ | ||
<(grep -v "^# The command" "$test_dir/ref.p.paired_end.sorted.txt") || \ | ||
(echo "Output file ref.p.paired_end.sorted.txt does not match expected output" && exit 1) | ||
|
||
rm "$test_dir/test.p.paired_end.sorted.txt" | ||
|
||
############################################################################################ | ||
|
||
echo ">>> All tests passed successfully." | ||
|
||
exit 0 |
Oops, something went wrong.