Skip to content

Commit

Permalink
add more tests to samtools/index
Browse files Browse the repository at this point in the history
  • Loading branch information
emmarousseau committed Mar 29, 2024
1 parent 84c61b4 commit 0aecaa3
Show file tree
Hide file tree
Showing 10 changed files with 122 additions and 50 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@
- `salmon/salmon_index`: Create a salmon index for the transcriptome to use Salmon in the mapping-based mode (PR #24).
- `salmon/salmon_quant`: Transcript quantification from RNA-seq data (PR #24).

* `samtools`:
- `samtools/index`: Index SAM/BAM/CRAM files.

## MAJOR CHANGES

## MINOR CHANGES
Expand Down
55 changes: 27 additions & 28 deletions src/samtools/index/config.vsh.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name: samtools_index
namespace: samtools
description: Index BAM files
keywords: [stats, mapping, counts, chromosome, bam, sam, cram]
description: Index SAM/BAM/CRAM files
keywords: [index, bam, sam, cram]
links:
homepage: https://www.htslib.org/
documentation: https://www.htslib.org/doc/samtools-index.html
Expand All @@ -13,47 +13,46 @@ license: MIT/Expat
argument_groups:
- name: Inputs
arguments:
- name: --bam
alternatives: -b
- name: --input
type: file
description: BAM input file.
- name: "--bam_csi_index"
type: boolean
description: Create a CSI index for BAM files instead of the traditional BAI index. This will be required for genomes with larger chromosome sizes.
description: Input file name
required: true
must_exist: true
- name: Outputs
arguments:
- name: --output_bai
alternatives: [ -o, --output ]
- name: --output
alternatives: -o
type: file
description: BAM index
direction: output
description: Output file name
required: true
must_exist: false
example: out.bam.bai
must_exist: false
- name: --output_csi
alternatives: [ -c, --csi ]
type: file
description: CSI index
direction: output
default: out.bam.csi
- name: --min-shift
alternatives: -m
type: integer
description: Create a CSI index, with a minimum interval size of 2^INT.
default: 0
- name: Options
arguments:
- name: --individual
alternatives: -M
- name: --bai
alternatives: -b
type: boolean_true
description: Generate BAM index
- name: --csi
alternatives: -c
type: boolean_true
description: |
Create a CSI index for BAM files instead of the traditional BAI
index. This will be required for genomes with larger chromosome
sizes.
- name: --min_shift
alternatives: -m
type: integer
description: |
Create a CSI index, with a minimum interval size of 2^INT.
- name: --multiple
alternatives: -M
type: boolean_true
description: |
Interpret all filename arguments as alignment files to
be indexed individually. (Without -M, filename arguments
are interpreted solely as per the second synopsis.)
resources:
- type: bash_script
path: script.sh
Expand Down
12 changes: 8 additions & 4 deletions src/samtools/index/help.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,12 @@
samtools index
```

Usage: samtools index [-bc] [-m INT] <in.bam> [out.index]
Usage: samtools index -M [-bc] [-m INT] <in1.bam> <in2.bam>...
or: samtools index [-bc] [-m INT] <in.bam> [out.index]
Options:
-b Generate BAI-format index for BAM files [default]
-c Generate CSI-format index for BAM files
-m INT Set minimum interval size for CSI indices to 2^INT [14]
-b, --bai Generate BAI-format index for BAM files [default]
-c, --csi Generate CSI-format index for BAM files
-m, --min-shift INT Set minimum interval size for CSI indices to 2^INT [14]
-M Interpret all filename arguments as files to be indexed
-o, --output FILE Write index to FILE [alternative to <out.index> in args]
-@, --threads INT Sets the number of threads [none]
11 changes: 8 additions & 3 deletions src/samtools/index/script.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,15 @@
## VIASH END

set -e

[[ "$par_output_csi" == "false" ]] && unset par_output_csi
[[ "$par_multiple" == "false" ]] && unset par_multiple
[[ "$par_bai" == "false" ]] && unset par_bai
[[ "$par_csi" == "false" ]] && unset par_csi
[[ "$par_multiple" == "true" ]] && par_multiple="--multiple"

samtools index \
"$par_input" \
${par_output_csi:+-c} \
${par_csi:+-c} \
${par_bai:+-b} \
${par_min_shift:+-m "par_output_bai"} \
${par_multiple:+-M} \
-o "$par_output"
83 changes: 73 additions & 10 deletions src/samtools/index/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,23 +5,86 @@ echo ">>> Testing $meta_functionality_name"

echo ">>> Generating BAM index"
"$meta_executable" \
--input "$test_dir/chr19.bam" \
--bam_csi_index false \
--output_bai "$test_dir/chr19.bam.bai"
--input "$test_dir/a.sorted.bam" \
--bai \
--output "$test_dir/a.sorted.bam.bai"

echo ">>> Check whether output exists"
[ ! -f chr19.bam.bai ] && echo "File 'mapt.NA12156.altex.bam.bai' does not exist!" && exit 1
[ ! -s chr19.bam.bai ] && echo "File 'mapt.NA12156.altex.bam.bai' is empty!" && exit 1
[ ! -f "$test_dir/a.sorted.bam.bai" ] && echo "File 'a.sorted.bam.bai' does not exist!" && exit 1

echo ">>> Check whether output is empty"
[ ! -s "$test_dir/a.sorted.bam.bai" ] && echo "File 'a.sorted.bam.bai' is empty!" && exit 1

echo ">>> Check whether output is correct"
diff "$test_dir/a.sorted.bam.bai" "$test_dir/a_ref.sorted.bam.bai" || \
(echo "File 'a.sorted.bam.bai' does not match expected output." && exit 1)

rm "$test_dir/a.sorted.bam.bai"

#################################################################################################

echo ">>> Generating CSI index"
"$meta_executable" \
--input "$test_dir/chr19.bam" \
--bam_csi_index true \
--output_csi "$test_dir/chr19.bam.csi"
--input "$test_dir/a.sorted.bam" \
--csi \
--output "$test_dir/a.sorted.bam.csi"

echo ">>> Check whether output exists"
[ ! -f "$test_dir/a.sorted.bam.csi" ] && echo "File 'a.sorted.bam.csi' does not exist!" && exit 1

echo ">>> Check whether output is empty"
[ ! -s "$test_dir/a.sorted.bam.csi" ] && echo "File 'a.sorted.bam.csi' is empty!" && exit 1

echo ">>> Check whether output is correct"
diff "$test_dir/a.sorted.bam.csi" "$test_dir/a_ref.sorted.bam.csi" || \
(echo "File 'a.sorted.bam.csi' does not match expected output." && exit 1)

rm "$test_dir/a.sorted.bam.csi"

#################################################################################################

echo ">>> Generating bam index with -M option"
"$meta_executable" \
--input "$test_dir/a.sorted.bam" \
--bai \
--output "$test_dir/a_multiple.sorted.bam.bai" \
--multiple

echo ">>> Check whether output exists"
[ ! -f "$test_dir/a_multiple.sorted.bam.bai" ] && echo "File 'a_multiple.sorted.bam.bai' does not exist!" && exit 1

echo ">>> Check whether output is empty"
[ ! -s "$test_dir/a_multiple.sorted.bam.bai" ] && echo "File 'a_multiple.sorted.bam.bai' is empty!" && exit 1

echo ">>> Check whether output is correct"
diff "$test_dir/a_multiple.sorted.bam.bai" "$test_dir/a_multiple_ref.sorted.bam.bai" || \
(echo "File 'a_multiple.sorted.bam.bai' does not match expected output." && exit 1)


#################################################################################################

echo ">>> Generating BAM index with -m option"

"$meta_executable" \
--input "$test_dir/a.sorted.bam" \
--min_shift 4 \
--bai \
--output "$test_dir/a_4.sorted.bam.bai"

echo ">>> Check whether output exists"
[ ! -f "chr19.bam.csi" ] && echo "File 'mapt.NA12156.altex.bam.csi' does not exist!" && exit 1
[ ! -s "chr19.bam.csi" ] && echo "File 'mapt.NA12156.altex.bam.csi' is empty!" && exit 1
[ ! -f "$test_dir/a_4.sorted.bam.bai" ] && echo "File 'a_4.sorted.bam.bai' does not exist!" && exit 1

echo ">>> Check whether output is empty"
[ ! -s "$test_dir/a_4.sorted.bam.bai" ] && echo "File 'a_4.sorted.bam.bai' is empty!" && exit 1

echo ">>> Check whether output is correct"
diff "$test_dir/a_4.sorted.bam.bai" "$test_dir/a_4_ref.sorted.bam.bai" || \
(echo "File 'a_4.sorted.bam.bai' does not match expected output." && exit 1)

rm "$test_dir/a_4.sorted.bam.bai"

#################################################################################################


echo "All tests succeeded!"
exit 0
Binary file added src/samtools/index/test_data/a_4_ref.sorted.bam.bai
Binary file not shown.
Binary file not shown.
8 changes: 3 additions & 5 deletions src/samtools/index/test_data/script.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,7 @@ if [ ! -d /tmp/idxstats_source ]; then
fi

cp -r /tmp/idxstats_source/bio/samtools/idxstats/test/mapped/* src/samtools/idxstats/test_data
# samtools idxstats a.sorted.bam > a.sorted.idxstats
# samtools index a_ref.sorted.bam -o a_ref.sorted.bam.bai
# samtools index a_ref.sorted.bam -c a_ref.sorted.bam.csi


# dowload test data from nf-core module
wget https://github.com/nf-core/test-datasets/raw/modules/data/genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam
wget https://github.com/nf-core/test-datasets/raw/modules/data/genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai
# samtools idxstats test.paired_end.sorted.bam > test_ref.paired_end.sorted.idxstats
Binary file not shown.

0 comments on commit 0aecaa3

Please sign in to comment.