From 0aecaa35a86bfc9eaf7db3b41f8c0edaa654febc Mon Sep 17 00:00:00 2001 From: emmarousseau Date: Fri, 29 Mar 2024 20:38:41 +0100 Subject: [PATCH] add more tests to samtools/index --- CHANGELOG.md | 3 + src/samtools/index/config.vsh.yaml | 55 ++++++------ src/samtools/index/help.txt | 12 ++- src/samtools/index/script.sh | 11 ++- src/samtools/index/test.sh | 83 +++++++++++++++--- .../index/test_data/a_4_ref.sorted.bam.bai | Bin 0 -> 96 bytes .../test_data/a_multiple_ref.sorted.bam.bai | Bin 0 -> 96 bytes ...re.sorted.bam.csi => a_ref.sorted.bam.csi} | Bin src/samtools/index/test_data/script.sh | 8 +- .../test_data/test.paired_end.sorted.bam | Bin 88 -> 0 bytes 10 files changed, 122 insertions(+), 50 deletions(-) create mode 100644 src/samtools/index/test_data/a_4_ref.sorted.bam.bai create mode 100644 src/samtools/index/test_data/a_multiple_ref.sorted.bam.bai rename src/samtools/index/test_data/{a_re.sorted.bam.csi => a_ref.sorted.bam.csi} (100%) delete mode 100644 src/samtools/index/test_data/test.paired_end.sorted.bam diff --git a/CHANGELOG.md b/CHANGELOG.md index c3f22e96..dde87957 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -35,6 +35,9 @@ - `salmon/salmon_index`: Create a salmon index for the transcriptome to use Salmon in the mapping-based mode (PR #24). - `salmon/salmon_quant`: Transcript quantification from RNA-seq data (PR #24). +* `samtools`: + - `samtools/index`: Index SAM/BAM/CRAM files. + ## MAJOR CHANGES ## MINOR CHANGES diff --git a/src/samtools/index/config.vsh.yaml b/src/samtools/index/config.vsh.yaml index 22c5e89c..07d81728 100644 --- a/src/samtools/index/config.vsh.yaml +++ b/src/samtools/index/config.vsh.yaml @@ -1,7 +1,7 @@ name: samtools_index namespace: samtools -description: Index BAM files -keywords: [stats, mapping, counts, chromosome, bam, sam, cram] +description: Index SAM/BAM/CRAM files +keywords: [index, bam, sam, cram] links: homepage: https://www.htslib.org/ documentation: https://www.htslib.org/doc/samtools-index.html @@ -13,47 +13,46 @@ license: MIT/Expat argument_groups: - name: Inputs arguments: - - name: --bam - alternatives: -b + - name: --input type: file - description: BAM input file. - - name: "--bam_csi_index" - type: boolean - description: Create a CSI index for BAM files instead of the traditional BAI index. This will be required for genomes with larger chromosome sizes. + description: Input file name + required: true + must_exist: true - name: Outputs arguments: - - name: --output_bai - alternatives: [ -o, --output ] + - name: --output + alternatives: -o type: file - description: BAM index - direction: output + description: Output file name required: true must_exist: false example: out.bam.bai - must_exist: false - - name: --output_csi - alternatives: [ -c, --csi ] - type: file - description: CSI index - direction: output - default: out.bam.csi - - name: --min-shift - alternatives: -m - type: integer - description: Create a CSI index, with a minimum interval size of 2^INT. - default: 0 - name: Options arguments: - - name: --individual - alternatives: -M + - name: --bai + alternatives: -b + type: boolean_true + description: Generate BAM index + - name: --csi + alternatives: -c + type: boolean_true + description: | + Create a CSI index for BAM files instead of the traditional BAI + index. This will be required for genomes with larger chromosome + sizes. + - name: --min_shift + alternatives: -m type: integer + description: | + Create a CSI index, with a minimum interval size of 2^INT. + - name: --multiple + alternatives: -M + type: boolean_true description: | Interpret all filename arguments as alignment files to be indexed individually. (Without -M, filename arguments are interpreted solely as per the second synopsis.) - - resources: - type: bash_script path: script.sh diff --git a/src/samtools/index/help.txt b/src/samtools/index/help.txt index 52026e0d..fdf0d12d 100644 --- a/src/samtools/index/help.txt +++ b/src/samtools/index/help.txt @@ -2,8 +2,12 @@ samtools index ``` -Usage: samtools index [-bc] [-m INT] [out.index] +Usage: samtools index -M [-bc] [-m INT] ... + or: samtools index [-bc] [-m INT] [out.index] Options: - -b Generate BAI-format index for BAM files [default] - -c Generate CSI-format index for BAM files - -m INT Set minimum interval size for CSI indices to 2^INT [14] \ No newline at end of file + -b, --bai Generate BAI-format index for BAM files [default] + -c, --csi Generate CSI-format index for BAM files + -m, --min-shift INT Set minimum interval size for CSI indices to 2^INT [14] + -M Interpret all filename arguments as files to be indexed + -o, --output FILE Write index to FILE [alternative to in args] + -@, --threads INT Sets the number of threads [none] \ No newline at end of file diff --git a/src/samtools/index/script.sh b/src/samtools/index/script.sh index a74c7872..b50131e6 100644 --- a/src/samtools/index/script.sh +++ b/src/samtools/index/script.sh @@ -4,10 +4,15 @@ ## VIASH END set -e - -[[ "$par_output_csi" == "false" ]] && unset par_output_csi +[[ "$par_multiple" == "false" ]] && unset par_multiple +[[ "$par_bai" == "false" ]] && unset par_bai +[[ "$par_csi" == "false" ]] && unset par_csi +[[ "$par_multiple" == "true" ]] && par_multiple="--multiple" samtools index \ "$par_input" \ - ${par_output_csi:+-c} \ + ${par_csi:+-c} \ + ${par_bai:+-b} \ + ${par_min_shift:+-m "par_output_bai"} \ + ${par_multiple:+-M} \ -o "$par_output" \ No newline at end of file diff --git a/src/samtools/index/test.sh b/src/samtools/index/test.sh index e4ad61c5..f77fb018 100644 --- a/src/samtools/index/test.sh +++ b/src/samtools/index/test.sh @@ -5,23 +5,86 @@ echo ">>> Testing $meta_functionality_name" echo ">>> Generating BAM index" "$meta_executable" \ - --input "$test_dir/chr19.bam" \ - --bam_csi_index false \ - --output_bai "$test_dir/chr19.bam.bai" + --input "$test_dir/a.sorted.bam" \ + --bai \ + --output "$test_dir/a.sorted.bam.bai" echo ">>> Check whether output exists" -[ ! -f chr19.bam.bai ] && echo "File 'mapt.NA12156.altex.bam.bai' does not exist!" && exit 1 -[ ! -s chr19.bam.bai ] && echo "File 'mapt.NA12156.altex.bam.bai' is empty!" && exit 1 +[ ! -f "$test_dir/a.sorted.bam.bai" ] && echo "File 'a.sorted.bam.bai' does not exist!" && exit 1 + +echo ">>> Check whether output is empty" +[ ! -s "$test_dir/a.sorted.bam.bai" ] && echo "File 'a.sorted.bam.bai' is empty!" && exit 1 + +echo ">>> Check whether output is correct" +diff "$test_dir/a.sorted.bam.bai" "$test_dir/a_ref.sorted.bam.bai" || \ + (echo "File 'a.sorted.bam.bai' does not match expected output." && exit 1) + +rm "$test_dir/a.sorted.bam.bai" + +################################################################################################# echo ">>> Generating CSI index" "$meta_executable" \ - --input "$test_dir/chr19.bam" \ - --bam_csi_index true \ - --output_csi "$test_dir/chr19.bam.csi" + --input "$test_dir/a.sorted.bam" \ + --csi \ + --output "$test_dir/a.sorted.bam.csi" + +echo ">>> Check whether output exists" +[ ! -f "$test_dir/a.sorted.bam.csi" ] && echo "File 'a.sorted.bam.csi' does not exist!" && exit 1 + +echo ">>> Check whether output is empty" +[ ! -s "$test_dir/a.sorted.bam.csi" ] && echo "File 'a.sorted.bam.csi' is empty!" && exit 1 + +echo ">>> Check whether output is correct" +diff "$test_dir/a.sorted.bam.csi" "$test_dir/a_ref.sorted.bam.csi" || \ + (echo "File 'a.sorted.bam.csi' does not match expected output." && exit 1) + +rm "$test_dir/a.sorted.bam.csi" + +################################################################################################# + +echo ">>> Generating bam index with -M option" +"$meta_executable" \ + --input "$test_dir/a.sorted.bam" \ + --bai \ + --output "$test_dir/a_multiple.sorted.bam.bai" \ + --multiple + +echo ">>> Check whether output exists" +[ ! -f "$test_dir/a_multiple.sorted.bam.bai" ] && echo "File 'a_multiple.sorted.bam.bai' does not exist!" && exit 1 + +echo ">>> Check whether output is empty" +[ ! -s "$test_dir/a_multiple.sorted.bam.bai" ] && echo "File 'a_multiple.sorted.bam.bai' is empty!" && exit 1 + +echo ">>> Check whether output is correct" +diff "$test_dir/a_multiple.sorted.bam.bai" "$test_dir/a_multiple_ref.sorted.bam.bai" || \ + (echo "File 'a_multiple.sorted.bam.bai' does not match expected output." && exit 1) + + +################################################################################################# + +echo ">>> Generating BAM index with -m option" + +"$meta_executable" \ + --input "$test_dir/a.sorted.bam" \ + --min_shift 4 \ + --bai \ + --output "$test_dir/a_4.sorted.bam.bai" echo ">>> Check whether output exists" -[ ! -f "chr19.bam.csi" ] && echo "File 'mapt.NA12156.altex.bam.csi' does not exist!" && exit 1 -[ ! -s "chr19.bam.csi" ] && echo "File 'mapt.NA12156.altex.bam.csi' is empty!" && exit 1 +[ ! -f "$test_dir/a_4.sorted.bam.bai" ] && echo "File 'a_4.sorted.bam.bai' does not exist!" && exit 1 + +echo ">>> Check whether output is empty" +[ ! -s "$test_dir/a_4.sorted.bam.bai" ] && echo "File 'a_4.sorted.bam.bai' is empty!" && exit 1 + +echo ">>> Check whether output is correct" +diff "$test_dir/a_4.sorted.bam.bai" "$test_dir/a_4_ref.sorted.bam.bai" || \ + (echo "File 'a_4.sorted.bam.bai' does not match expected output." && exit 1) + +rm "$test_dir/a_4.sorted.bam.bai" + +################################################################################################# + echo "All tests succeeded!" exit 0 \ No newline at end of file diff --git a/src/samtools/index/test_data/a_4_ref.sorted.bam.bai b/src/samtools/index/test_data/a_4_ref.sorted.bam.bai new file mode 100644 index 0000000000000000000000000000000000000000..4f08f5d5e5e01de5042cb8f625c1d26b338c3394 GIT binary patch literal 96 vcmZ>A^kigYU|?VZVoxCk1`wNpAp%S?Fl+_WUXvj5=;CZpRWJ%wFGLgo8^r{K literal 0 HcmV?d00001 diff --git a/src/samtools/index/test_data/a_multiple_ref.sorted.bam.bai b/src/samtools/index/test_data/a_multiple_ref.sorted.bam.bai new file mode 100644 index 0000000000000000000000000000000000000000..4f08f5d5e5e01de5042cb8f625c1d26b338c3394 GIT binary patch literal 96 vcmZ>A^kigYU|?VZVoxCk1`wNpAp%S?Fl+_WUXvj5=;CZpRWJ%wFGLgo8^r{K literal 0 HcmV?d00001 diff --git a/src/samtools/index/test_data/a_re.sorted.bam.csi b/src/samtools/index/test_data/a_ref.sorted.bam.csi similarity index 100% rename from src/samtools/index/test_data/a_re.sorted.bam.csi rename to src/samtools/index/test_data/a_ref.sorted.bam.csi diff --git a/src/samtools/index/test_data/script.sh b/src/samtools/index/test_data/script.sh index 0e28a4c6..ee86e514 100755 --- a/src/samtools/index/test_data/script.sh +++ b/src/samtools/index/test_data/script.sh @@ -6,9 +6,7 @@ if [ ! -d /tmp/idxstats_source ]; then fi cp -r /tmp/idxstats_source/bio/samtools/idxstats/test/mapped/* src/samtools/idxstats/test_data -# samtools idxstats a.sorted.bam > a.sorted.idxstats +# samtools index a_ref.sorted.bam -o a_ref.sorted.bam.bai +# samtools index a_ref.sorted.bam -c a_ref.sorted.bam.csi + -# dowload test data from nf-core module -wget https://github.com/nf-core/test-datasets/raw/modules/data/genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam -wget https://github.com/nf-core/test-datasets/raw/modules/data/genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai -# samtools idxstats test.paired_end.sorted.bam > test_ref.paired_end.sorted.idxstats \ No newline at end of file diff --git a/src/samtools/index/test_data/test.paired_end.sorted.bam b/src/samtools/index/test_data/test.paired_end.sorted.bam deleted file mode 100644 index 181130e88d5c6971989fbdcb943d7c866be38b85..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 88 zcmb2|=3rp}f&Xj_PR>jW)(pjb-%_3=J21E@q&0n3dXQvb#(db-;AG2^cFt}aY34GZ V(yKvF3V>SV(ez3)FoTT(5db8l6x;v+