Skip to content

Commit

Permalink
Add tests and adjust script
Browse files Browse the repository at this point in the history
  • Loading branch information
emmarousseau committed Apr 18, 2024
1 parent 9b21fd4 commit e28272d
Show file tree
Hide file tree
Showing 11 changed files with 119 additions and 32 deletions.
22 changes: 12 additions & 10 deletions src/samtools/samtools_faidx/config.vsh.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ description: Indexes FASTA files to enable random access to fasta and fastq file
keywords: [ idex, fasta, faidx ]
links:
homepage: https://www.htslib.org/
documentation: https://www.htslib.org/doc/faidx.html
documentation: https://www.htslib.org/doc/samtools-faidx.html
repository: https://github.com/samtools/samtools
references:
doi: [10.1093/bioinformatics/btp352, 10.1093/gigascience/giab008]
Expand All @@ -13,27 +13,29 @@ license: MIT/Expat
argument_groups:
- name: Inputs
arguments:
- name: --fasta
- name: --input
type: file
description: |
FASTA input file.
- name: --length
alternatives: -n
type: integer
description: |
Length of FASTA sequence line.
Length for FASTA sequence line wrapping. If zero, this means do not
line wrap. Defaults to the line length in the input file.
default: 60
- name: --region_file
alternatives: -r
type: file
description: |
File of regions. Format is chr:from-to. One per line.
File of regions. Format is chr:from-to. One per line.
Must be used with --output to avoid sending output to stdout.
- name: Options
arguments:
- name: --continue
type: boolean_true
description: |
Continue after trying to retrieve missing region.
Continue working if a non-existent region is requested.
- name: --reverse_complement
alternatives: -i
type: boolean_true
Expand All @@ -45,9 +47,9 @@ argument_groups:
alternatives: -o
type: file
description: |
Write FASTA to file.
required: true
Write output to file.
direction: output
required: true
example: output.fasta
- name: --mark_strand
type: string
Expand All @@ -58,19 +60,19 @@ argument_groups:
- name: --fai_idx
type: file
description: |
Name of the index file (default file.fa.fai).
Read/Write to specified index file (default file.fa.fai).
direction: output
example: file.fa.fai
- name: --gzi_idx
type: file
description: |
Name of compressed file index (default file.fa.gz.gzi).
Read/Write to specified compressed file index (used with .gz files, default file.fa.gz.gzi).
direction: output
example: file.fa.gz.gzi
- name: --fastq
type: boolean_true
description: |
File and index in FASTQ format.
Read FASTQ files and output extracted sequences in FASTQ format. Same as using samtools fqidx.
resources:
- type: bash_script
Expand Down
10 changes: 5 additions & 5 deletions src/samtools/samtools_faidx/script.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,12 @@ set -e
[[ "$par_fastq" == "false" ]] && unset par_fastq

samtools faidx \
"$par_fasta" \
-o "$par_output" \
${par_length:+-l "$par_length"} \
"$par_input" \
${par_output:+-o "$par_output"} \
${par_length:+-n "$par_length"} \
${par_continue:+-c} \
${part_region_file:+-r "$par_region_file"} \
${par_revferse_complement:+-r} \
${par_region_file:+-r "$par_region_file"} \
${par_reverse_complement:+-r} \
${par_mark_strand:+--mark-strand "$par_mark_strand"} \
${par_fai_idx:+--fai-idx "$par_fai_idx"} \
${par_gzi_idx:+--gzi-idx "$par_gzi_idx"} \
Expand Down
99 changes: 82 additions & 17 deletions src/samtools/samtools_faidx/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,36 +4,101 @@ test_dir="${meta_resources_dir}/test_data"
echo ">>> Testing $meta_functionality_name"

"$meta_executable" \
"$test_dir/test.fasta" \
--output "$test_dir/test.fasta.fai" \
--continue \
--fai-idx
--input "$test_dir/test.fasta" \
--output "$test_dir/test.fasta.fai"

echo "$meta_executable"
echo "$test_dir/test.fasta"

echo ">>> Checking whether output exists"
[ ! -f "$test_dir/test.fasta.fai" ] && echo "File 'test.fasta.fai' does not exist!" && exit 1
[ ! -f "$test_dir/test.fasta.fai.idx" ] && echo "File 'test.fasta.fai.idx' does not exist!" && exit 1
[ ! -f "$test_dir/test.fasta.gzi.idx" ] && echo "File 'test.fasta.gzi.idx' does not exist!" && exit 1

echo ">>> Checking whether output is non-empty"
[ ! -s "$test_dir/test.fasta.fai" ] && echo "File 'test.fasta.fai' is empty!" && exit 1
[ ! -s "$test_dir/test.fasta.fai.idx" ] && echo "File 'test.fasta.fai.idx' is empty!" && exit 1
[ ! -s "$test_dir/test.fasta.gzi.idx" ] && echo "File 'test.fasta.gzi.idx' is empty!" && exit 1

echo ">>> Checking whether output is correct"
diff "$test_dir/a.flagstat" "$test_dir/a_ref.flagstat" || \
(echo "Output file a.flagstat does not match expected output" && exit 1)
diff "$test_dir/test.fasta.fai" "$test_dir/output/test.fasta.fai" || \
(echo "Output file test.fasta.fai does not match expected output" && exit 1)

rm "$test_dir/test.fasta.fai"

####################################################################################################

echo ">>> Test 2: ${meta_functionality_name} with bgzipped input"

"$meta_executable" \
--input "$test_dir/test.fasta.gz" \
--output "$test_dir/test.fasta.gz.fai"

echo ">>> Checking whether output exists"1
[ ! -f "$test_dir/test.fasta.gz.fai" ] && echo "File 'test.fasta.gz.fai' does not exist!" && exit 1
[ ! -f "$test_dir/test.fasta.gz.gzi" ] && echo "File 'test.fasta.gz.gzi' does not exist!" && exit 1

echo ">>> Checking whether output is non-empty"
[ ! -s "$test_dir/test.fasta.gz.fai" ] && echo "File 'test.fasta.gz.fai' is empty!" && exit 1
[ ! -s "$test_dir/test.fasta.gz.gzi" ] && echo "File 'test.fasta.gz.gzi' is empty!" && exit 1

echo ">>> Checking whether output is correct"
diff "$test_dir/test.fasta.gz.fai" "$test_dir/output/test.fasta.gz.fai" || \
(echo "Output file test_zip.fasta.gz.fai does not match expected output" && exit 1)
diff "$test_dir/test.fasta.gz.gzi" "$test_dir/output/test.fasta.gz.gzi" || \
(echo "Output file test2.fasta.gz.gzi does not match expected output" && exit 1)

rm "$test_dir/test.fasta.gz.fai"
rm "$test_dir/test.fasta.gz.gzi"

####################################################################################################

echo ">>> Test 3: ${meta_functionality_name} with fastq input"

"$meta_executable" \
--input "$test_dir/test.fastq" \
--output "$test_dir/test.fastq.fai"

echo ">>> Checking whether output exists"
[ ! -f "$test_dir/test.fastq.fai" ] && echo "File 'test.fastq.fai' does not exist!" && exit 1

echo ">>> Checking whether output is non-empty"
[ ! -s "$test_dir/test.fastq.fai" ] && echo "File 'test.fastq.fai' is empty!" && exit 1

echo ">>> Checking whether output is correct"
diff "$test_dir/test.fastq.fai" "$test_dir/output/test.fastq.fai" || \
(echo "Output file test.fastq.fai does not match expected output" && exit 1)

rm "$test_dir/test.fastq.fai"

rm "$test_dir/a.flagstat"
####################################################################################################

echo ">>> Test 2:"
echo ">>> Test 4: ${meta_functionality_name} with region file containing non-existent regions and
specific fasta line wrap length"

"$meta_executable" \
"$test_dir/test.fasta" \
--output "$test_dir/test.fasta.fai" \
--length 60 \
--input "$test_dir/test.fasta" \
--output "$test_dir/regions.fasta" \
--length 10 \
--continue \
--gzi-idx "$test_dir/test.fasta.gz.gzi" \
--region_file "$test_dir/test.regions" \
--fai_idx "$test_dir/regions.fasta.fai"

echo ">>> Checking whether output exists"
[ ! -f "$test_dir/regions.fasta" ] && echo "File 'regions.fasta' does not exist!" && exit 1
[ ! -f "$test_dir/regions.fasta.fai" ] && echo "File 'regions.fasta.fai' does not exist!" && exit 1

echo ">>> Checking whether output is non-empty"
[ ! -s "$test_dir/regions.fasta" ] && echo "File 'regions.fasta' is empty!" && exit 1
[ ! -s "$test_dir/regions.fasta.fai" ] && echo "File 'regions.fasta.fai' is empty!" && exit 1

echo ">>> Checking whether output is correct"
diff "$test_dir/regions.fasta" "$test_dir/output/regions.fasta" || \
(echo "Output file regions.fasta does not match expected output" && exit 1)
diff "$test_dir/regions.fasta.fai" "$test_dir/output/regions.fasta.fai" || \
(echo "Output file regions.fasta.fai does not match expected output" && exit 1)

rm "$test_dir/regions.fasta"
rm "$test_dir/regions.fasta.fai"

####################################################################################################

echo "All tests succeeded!"
exit 0
exit 0

14 changes: 14 additions & 0 deletions src/samtools/samtools_faidx/test_data/output/regions.fasta
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
>YAL069W:300-315
CCCAAATATT
GTATAA
>YAL068C:200-230
CTGAAGCCGT
TTTCAACTAC
GGTGACTTCA
C
>YAL067W-A:115-145
GCTTATTGTC
TAAGCCTGAA
TTCAGTCTGC
T
>chr1:1-100
Binary file not shown.
2 changes: 2 additions & 0 deletions src/samtools/samtools_faidx/test_data/output/test.fastq.fai
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
fastq1 66 8 30 31 79
fastq2 28 156 14 15 188
Binary file added src/samtools/samtools_faidx/test_data/test.fasta.gz
Binary file not shown.
4 changes: 4 additions & 0 deletions src/samtools/samtools_faidx/test_data/test.regions
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
YAL069W:300-315
YAL068C:200-230
YAL067W-A:115-145
chr1:1-100

0 comments on commit e28272d

Please sign in to comment.