Skip to content

Commit

Permalink
Update changelog, add tests, fix argument naming errors, add test data
Browse files Browse the repository at this point in the history
  • Loading branch information
emmarousseau committed May 10, 2024
1 parent cd118b7 commit 790a466
Show file tree
Hide file tree
Showing 9 changed files with 149 additions and 20 deletions.
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,8 @@
- `samtools/samtools_index`: Index SAM/BAM/CRAM files (PR #35).
- `samtools/samtools_sort`: Sort SAM/BAM/CRAM files (PR #36).
- `samtools/samtools_stats`: Reports alignment summary statistics for a BAM file (PR #39).
- `samtools/samtools_stats`: Indexes FASTA files to enable random access to fasta and fastq files (PR #41).
- `samtools/samtools_faidx`: Indexes FASTA files to enable random access to fasta and fastq files (PR #41).
- `samtools/samtools_fastq`: Converts a SAM/BAM/CRAM file to FASTQ (PR #49).

* `falco`: A C++ drop-in replacement of FastQC to assess the quality of sequence read data (PR #43).

Expand Down
19 changes: 12 additions & 7 deletions src/samtools/samtools_fastq/config.vsh.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
name: samtools_fastq
namespace: samtools
description: convert a SAM/BAM/CRAM file to FASTQ.
description: Converts a SAM, BAM or CRAM to FASTQ format.
keywords: [fastq, bam, sam, cram]
links:
homepage: https://www.htslib.org/
Expand All @@ -23,6 +23,7 @@ argument_groups:
type: file
description: output FASTQ file
required: true
direction: output
- name: Options
arguments:
- name: --no_suffix
Expand Down Expand Up @@ -58,29 +59,33 @@ argument_groups:
TAGLIST can be blank or * to indicate all tags should be copied to the output. If using *,
be careful to quote it to avoid unwanted shell expansion.
- name: --read1
alternatives: "-1"
alternatives: -1
type: file
description: |
Write reads with the READ1 FLAG set (and READ2 not set) to FILE instead of outputting them.
If the -s option is used, only paired reads will be written to this file.
direction: output
- name: --read2
alternatives: "-2"
alternatives: -2
type: file
description: |
Write reads with the READ2 FLAG set (and READ1 not set) to FILE instead of outputting them.
If the -s option is used, only paired reads will be written to this file.
direction: output
- name: --output_reads
alternatives: -o
type: file
description: |
Write reads with either READ1 FLAG or READ2 flag set to FILE instead of outputting them to stdout.
This is equivalent to -1 FILE -2 FILE.
direction: output
- name: --output_reads_both
alternatives: -0
type: file
description: |
Write reads where the READ1 and READ2 FLAG bits set are either both set or both unset to FILE
instead of outputting them.
direction: output
- name: --filter_flags
alternatives: -f
type: integer
Expand All @@ -90,17 +95,17 @@ argument_groups:
(i.e. /^0[0-7]+/).
default: 0
- name: --excl_flags
alternatives: "-F"
type: integer
alternatives: -F
type: string
description: |
Do not output alignments with any bits set in INT present in the FLAG field. INT can be specified
in hex by beginning with `0x' (i.e. /^0x[0-9A-F]+/) or in octal by beginning with `0'
(i.e. /^0[0-7]+/). This defaults to 0x900 representing filtering of secondary and
supplementary alignments.
default: 0x900
- name: --incl_flags
alternatives: "--rf"
type: integer
alternatives: --rf
type: string
description: |
Only output alignments with any bits set in INT present in the FLAG field. INT can be specified
in hex by beginning with `0x' (i.e. /^0x[0-9A-F]+/), in octal by beginning with `0'
Expand Down
82 changes: 71 additions & 11 deletions src/samtools/samtools_fastq/test.sh
Original file line number Diff line number Diff line change
@@ -1,36 +1,96 @@
#!/bin/bash

test_dir="${meta_resources_dir}/test_data"
out_dir="${meta_resources_dir}/tmp"
out_dir="${meta_resources_dir}/out_data"

############################################################################################

## example 1: samtools fastq -0 /dev/null in_name.bam > all_reads.fq
## example 2: samtools fastq -0 /dev/null -s single.fq -N in_name.bam > paired.fq
## example 3: samtools fastq with fasta output??
## example 4: samtools fastq with compressed input?
## example 5: samtools fastq with no suffix?
echo ">>> Test 1: Convert all reads from a bam file to fastq format"
"$meta_executable" \
--input "$test_dir/a.bam" \
--output "$out_dir/a.fq"

echo ">>> Check if output file exists"
[ ! -f "$out_dir/a.fq" ] && echo "Output file a.fq does not exist" && exit 1

echo ">>> Check if output is empty"
[ ! -s "$out_dir/a.fq" ] && echo "Output file a.fq is empty" && exit 1

echo ">>> Check if output matches expected output"
diff "$out_dir/a.fq" "$test_dir/a.fq" ||
(echo "Output file a.fq does not match expected output" && exit 1)

echo ">>> Test 1: Sorting a BAM file"
rm "$out_dir/a.fq"

############################################################################################

echo ">>> Test 2: Convert all reads from a sam file to fastq format"
"$meta_executable" \
--input "$test_dir/a.bam" \
--output "$test_dir/a.sorted.bam"
--input "$test_dir/a.sam" \
--output "$out_dir/a.fq"

echo ">>> Check if output file exists"
[ ] \
&& echo "Output file a.sorted.bam does not exist" && exit 1
[ ! -f "$out_dir/a.fq" ] && echo "Output file a.fq does not exist" && exit 1

echo ">>> Check if output is empty"
[ ! -s "$out_dir/a.fq" ] && echo "Output file a.fq is empty" && exit 1

echo ">>> Check if output matches expected output"
diff "$out_dir/a.fq" "$test_dir/a.fq" ||
(echo "Output file a.fq does not match expected output" && exit 1)

rm "$out_dir/a.fq"

############################################################################################

echo ">>> Test 3: Output reads from bam file to separate files"

"$meta_executable" \
--input "$test_dir/a.bam" \
--read1 "$out_dir/a.1.fq" \
--read2 "$out_dir/a.2.fq" \
--output "$out_dir/a.fq"

echo ">>> Check if output files exist"
[ ! -f "$out_dir/a.1.fq" ] && echo "Output file a.1.fq does not exist" && exit 1
[ ! -f "$out_dir/a.2.fq" ] && echo "Output file a.2.fq does not exist" && exit 1
[ ! -f "$out_dir/a.fq" ] && echo "Output file a.fq does not exist" && exit 1

echo ">>> Check if output files are empty"
[ ! -s "$out_dir/a.1.fq" ] && echo "Output file a.1.fq is empty" && exit 1
[ ! -s "$out_dir/a.2.fq" ] && echo "Output file a.2.fq is empty" && exit 1
# output should be empty since input has no singleton reads

echo ">>> Check if output files match expected output"
diff "$out_dir/a.1.fq" "$test_dir/a.1.fq" ||
(echo "Output file a.1.fq does not match expected output" && exit 1)
diff "$out_dir/a.2.fq" "$test_dir/a.2.fq" ||
(echo "Output file a.2.fq does not match expected output" && exit 1)

rm "$out_dir/a.1.fq" "$out_dir/a.2.fq" "$out_dir/a.fq"

############################################################################################

echo ">>> Test 4: Output only forward reads from bam file to fastq format"

"$meta_executable" \
--input "$test_dir/a.sam" \
--excl_flags "0x80" \
--output "$out_dir/half.fq"

echo ">>> Check if output file exists"
[ ! -f "$out_dir/half.fq" ] && echo "Output file half.fq does not exist" && exit 1

echo ">>> Check if output is empty"
[ ! -s "$out_dir/half.fq" ] && echo "Output file half.fq is empty" && exit 1

echo ">>> Check if output matches expected output"
diff "$out_dir/half.fq" "$test_dir/half.fq" ||
(echo "Output file half.fq does not match expected output" && exit 1)

rm "$out_dir/half.fq"

############################################################################################

echo "All tests succeeded!"
exit 0
12 changes: 12 additions & 0 deletions src/samtools/samtools_fastq/test_data/a.1.fq
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
@a1
AAAAAAAAAA
+
**********
@b1
AAAAAAAAAA
+
**********
@c1
AAAAAAAAAA
+
**********
12 changes: 12 additions & 0 deletions src/samtools/samtools_fastq/test_data/a.2.fq
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
@a1
AAAAAAAAAA
+
**********
@b1
AAAAAAAAAA
+
**********
@c1
AAAAAAAAAA
+
**********
Binary file added src/samtools/samtools_fastq/test_data/a.bam
Binary file not shown.
24 changes: 24 additions & 0 deletions src/samtools/samtools_fastq/test_data/a.fq
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
@a1/1
AAAAAAAAAA
+
**********
@b1/1
AAAAAAAAAA
+
**********
@c1/1
AAAAAAAAAA
+
**********
@a1/2
AAAAAAAAAA
+
**********
@b1/2
AAAAAAAAAA
+
**********
@c1/2
AAAAAAAAAA
+
**********
12 changes: 12 additions & 0 deletions src/samtools/samtools_fastq/test_data/half.fq
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
@a1/1
AAAAAAAAAA
+
**********
@b1/1
AAAAAAAAAA
+
**********
@c1/1
AAAAAAAAAA
+
**********
5 changes: 4 additions & 1 deletion src/samtools/samtools_fastq/test_data/script.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,7 @@ if [ ! -d /tmp/fastq_source ]; then
git clone --depth 1 --single-branch --branch master https://github.com/snakemake/snakemake-wrappers.git /tmp/fastq_source
fi

cp -r /tmp/fastq_source/bio/samtools/fastx/test/*.sam src/samtools/samtools_fastq/test_data/
cp -r /tmp/fastq_source/bio/samtools/fastx/test/*.sam src/samtools/samtools_fastq/test_data/
cp -r /tmp/fastq_source/bio/samtools/fastq/interleaved/test/mapped/*.bam src/samtools/samtools_fastq/test_data/
cp -r /tmp/fastq_source/bio/samtools/fastq/interleaved/test/reads/*.fq src/samtools/samtools_fastq/test_data/
cp -r /tmp/fastq_source/bio/samtools/fastq/separate/test/reads/*.fq src/samtools/samtools_fastq/test_data/

0 comments on commit 790a466

Please sign in to comment.