diff --git a/CHANGELOG.md b/CHANGELOG.md index c8d0a08b..33464879 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -43,7 +43,8 @@ - `samtools/samtools_index`: Index SAM/BAM/CRAM files (PR #35). - `samtools/samtools_sort`: Sort SAM/BAM/CRAM files (PR #36). - `samtools/samtools_stats`: Reports alignment summary statistics for a BAM file (PR #39). - - `samtools/samtools_stats`: Indexes FASTA files to enable random access to fasta and fastq files (PR #41). + - `samtools/samtools_faidx`: Indexes FASTA files to enable random access to fasta and fastq files (PR #41). + - `samtools/samtools_fastq`: Converts a SAM/BAM/CRAM file to FASTQ (PR #49). * `falco`: A C++ drop-in replacement of FastQC to assess the quality of sequence read data (PR #43). diff --git a/src/samtools/samtools_fastq/config.vsh.yaml b/src/samtools/samtools_fastq/config.vsh.yaml index 93f82a77..39e926f0 100644 --- a/src/samtools/samtools_fastq/config.vsh.yaml +++ b/src/samtools/samtools_fastq/config.vsh.yaml @@ -1,6 +1,6 @@ name: samtools_fastq namespace: samtools -description: convert a SAM/BAM/CRAM file to FASTQ. +description: Converts a SAM, BAM or CRAM to FASTQ format. keywords: [fastq, bam, sam, cram] links: homepage: https://www.htslib.org/ @@ -23,6 +23,7 @@ argument_groups: type: file description: output FASTQ file required: true + direction: output - name: Options arguments: - name: --no_suffix @@ -58,29 +59,33 @@ argument_groups: TAGLIST can be blank or * to indicate all tags should be copied to the output. If using *, be careful to quote it to avoid unwanted shell expansion. - name: --read1 - alternatives: "-1" + alternatives: -1 type: file description: | Write reads with the READ1 FLAG set (and READ2 not set) to FILE instead of outputting them. If the -s option is used, only paired reads will be written to this file. + direction: output - name: --read2 - alternatives: "-2" + alternatives: -2 type: file description: | Write reads with the READ2 FLAG set (and READ1 not set) to FILE instead of outputting them. If the -s option is used, only paired reads will be written to this file. + direction: output - name: --output_reads alternatives: -o type: file description: | Write reads with either READ1 FLAG or READ2 flag set to FILE instead of outputting them to stdout. This is equivalent to -1 FILE -2 FILE. + direction: output - name: --output_reads_both alternatives: -0 type: file description: | Write reads where the READ1 and READ2 FLAG bits set are either both set or both unset to FILE instead of outputting them. + direction: output - name: --filter_flags alternatives: -f type: integer @@ -90,8 +95,8 @@ argument_groups: (i.e. /^0[0-7]+/). default: 0 - name: --excl_flags - alternatives: "-F" - type: integer + alternatives: -F + type: string description: | Do not output alignments with any bits set in INT present in the FLAG field. INT can be specified in hex by beginning with `0x' (i.e. /^0x[0-9A-F]+/) or in octal by beginning with `0' @@ -99,8 +104,8 @@ argument_groups: supplementary alignments. default: 0x900 - name: --incl_flags - alternatives: "--rf" - type: integer + alternatives: --rf + type: string description: | Only output alignments with any bits set in INT present in the FLAG field. INT can be specified in hex by beginning with `0x' (i.e. /^0x[0-9A-F]+/), in octal by beginning with `0' diff --git a/src/samtools/samtools_fastq/test.sh b/src/samtools/samtools_fastq/test.sh index 13b3ae66..32ee3f5e 100644 --- a/src/samtools/samtools_fastq/test.sh +++ b/src/samtools/samtools_fastq/test.sh @@ -1,36 +1,96 @@ #!/bin/bash test_dir="${meta_resources_dir}/test_data" -out_dir="${meta_resources_dir}/tmp" +out_dir="${meta_resources_dir}/out_data" ############################################################################################ -## example 1: samtools fastq -0 /dev/null in_name.bam > all_reads.fq -## example 2: samtools fastq -0 /dev/null -s single.fq -N in_name.bam > paired.fq -## example 3: samtools fastq with fasta output?? -## example 4: samtools fastq with compressed input? -## example 5: samtools fastq with no suffix? +echo ">>> Test 1: Convert all reads from a bam file to fastq format" +"$meta_executable" \ + --input "$test_dir/a.bam" \ + --output "$out_dir/a.fq" + +echo ">>> Check if output file exists" +[ ! -f "$out_dir/a.fq" ] && echo "Output file a.fq does not exist" && exit 1 +echo ">>> Check if output is empty" +[ ! -s "$out_dir/a.fq" ] && echo "Output file a.fq is empty" && exit 1 + +echo ">>> Check if output matches expected output" +diff "$out_dir/a.fq" "$test_dir/a.fq" || + (echo "Output file a.fq does not match expected output" && exit 1) -echo ">>> Test 1: Sorting a BAM file" +rm "$out_dir/a.fq" +############################################################################################ + +echo ">>> Test 2: Convert all reads from a sam file to fastq format" "$meta_executable" \ - --input "$test_dir/a.bam" \ - --output "$test_dir/a.sorted.bam" + --input "$test_dir/a.sam" \ + --output "$out_dir/a.fq" echo ">>> Check if output file exists" -[ ] \ - && echo "Output file a.sorted.bam does not exist" && exit 1 +[ ! -f "$out_dir/a.fq" ] && echo "Output file a.fq does not exist" && exit 1 echo ">>> Check if output is empty" +[ ! -s "$out_dir/a.fq" ] && echo "Output file a.fq is empty" && exit 1 echo ">>> Check if output matches expected output" +diff "$out_dir/a.fq" "$test_dir/a.fq" || + (echo "Output file a.fq does not match expected output" && exit 1) +rm "$out_dir/a.fq" ############################################################################################ +echo ">>> Test 3: Output reads from bam file to separate files" + +"$meta_executable" \ + --input "$test_dir/a.bam" \ + --read1 "$out_dir/a.1.fq" \ + --read2 "$out_dir/a.2.fq" \ + --output "$out_dir/a.fq" + +echo ">>> Check if output files exist" +[ ! -f "$out_dir/a.1.fq" ] && echo "Output file a.1.fq does not exist" && exit 1 +[ ! -f "$out_dir/a.2.fq" ] && echo "Output file a.2.fq does not exist" && exit 1 +[ ! -f "$out_dir/a.fq" ] && echo "Output file a.fq does not exist" && exit 1 + +echo ">>> Check if output files are empty" +[ ! -s "$out_dir/a.1.fq" ] && echo "Output file a.1.fq is empty" && exit 1 +[ ! -s "$out_dir/a.2.fq" ] && echo "Output file a.2.fq is empty" && exit 1 +# output should be empty since input has no singleton reads + +echo ">>> Check if output files match expected output" +diff "$out_dir/a.1.fq" "$test_dir/a.1.fq" || + (echo "Output file a.1.fq does not match expected output" && exit 1) +diff "$out_dir/a.2.fq" "$test_dir/a.2.fq" || + (echo "Output file a.2.fq does not match expected output" && exit 1) + +rm "$out_dir/a.1.fq" "$out_dir/a.2.fq" "$out_dir/a.fq" + ############################################################################################ +echo ">>> Test 4: Output only forward reads from bam file to fastq format" + +"$meta_executable" \ + --input "$test_dir/a.sam" \ + --excl_flags "0x80" \ + --output "$out_dir/half.fq" + +echo ">>> Check if output file exists" +[ ! -f "$out_dir/half.fq" ] && echo "Output file half.fq does not exist" && exit 1 + +echo ">>> Check if output is empty" +[ ! -s "$out_dir/half.fq" ] && echo "Output file half.fq is empty" && exit 1 + +echo ">>> Check if output matches expected output" +diff "$out_dir/half.fq" "$test_dir/half.fq" || + (echo "Output file half.fq does not match expected output" && exit 1) + +rm "$out_dir/half.fq" + +############################################################################################ echo "All tests succeeded!" exit 0 \ No newline at end of file diff --git a/src/samtools/samtools_fastq/test_data/a.1.fq b/src/samtools/samtools_fastq/test_data/a.1.fq new file mode 100644 index 00000000..03eaa725 --- /dev/null +++ b/src/samtools/samtools_fastq/test_data/a.1.fq @@ -0,0 +1,12 @@ +@a1 +AAAAAAAAAA ++ +********** +@b1 +AAAAAAAAAA ++ +********** +@c1 +AAAAAAAAAA ++ +********** diff --git a/src/samtools/samtools_fastq/test_data/a.2.fq b/src/samtools/samtools_fastq/test_data/a.2.fq new file mode 100644 index 00000000..03eaa725 --- /dev/null +++ b/src/samtools/samtools_fastq/test_data/a.2.fq @@ -0,0 +1,12 @@ +@a1 +AAAAAAAAAA ++ +********** +@b1 +AAAAAAAAAA ++ +********** +@c1 +AAAAAAAAAA ++ +********** diff --git a/src/samtools/samtools_fastq/test_data/a.bam b/src/samtools/samtools_fastq/test_data/a.bam new file mode 100644 index 00000000..dba1268a Binary files /dev/null and b/src/samtools/samtools_fastq/test_data/a.bam differ diff --git a/src/samtools/samtools_fastq/test_data/a.fq b/src/samtools/samtools_fastq/test_data/a.fq new file mode 100644 index 00000000..d12c62ca --- /dev/null +++ b/src/samtools/samtools_fastq/test_data/a.fq @@ -0,0 +1,24 @@ +@a1/1 +AAAAAAAAAA ++ +********** +@b1/1 +AAAAAAAAAA ++ +********** +@c1/1 +AAAAAAAAAA ++ +********** +@a1/2 +AAAAAAAAAA ++ +********** +@b1/2 +AAAAAAAAAA ++ +********** +@c1/2 +AAAAAAAAAA ++ +********** diff --git a/src/samtools/samtools_fastq/test_data/half.fq b/src/samtools/samtools_fastq/test_data/half.fq new file mode 100644 index 00000000..85a2b1c4 --- /dev/null +++ b/src/samtools/samtools_fastq/test_data/half.fq @@ -0,0 +1,12 @@ +@a1/1 +AAAAAAAAAA ++ +********** +@b1/1 +AAAAAAAAAA ++ +********** +@c1/1 +AAAAAAAAAA ++ +********** diff --git a/src/samtools/samtools_fastq/test_data/script.sh b/src/samtools/samtools_fastq/test_data/script.sh index 7826933d..b59bc1bd 100755 --- a/src/samtools/samtools_fastq/test_data/script.sh +++ b/src/samtools/samtools_fastq/test_data/script.sh @@ -5,4 +5,7 @@ if [ ! -d /tmp/fastq_source ]; then git clone --depth 1 --single-branch --branch master https://github.com/snakemake/snakemake-wrappers.git /tmp/fastq_source fi -cp -r /tmp/fastq_source/bio/samtools/fastx/test/*.sam src/samtools/samtools_fastq/test_data/ \ No newline at end of file +cp -r /tmp/fastq_source/bio/samtools/fastx/test/*.sam src/samtools/samtools_fastq/test_data/ +cp -r /tmp/fastq_source/bio/samtools/fastq/interleaved/test/mapped/*.bam src/samtools/samtools_fastq/test_data/ +cp -r /tmp/fastq_source/bio/samtools/fastq/interleaved/test/reads/*.fq src/samtools/samtools_fastq/test_data/ +cp -r /tmp/fastq_source/bio/samtools/fastq/separate/test/reads/*.fq src/samtools/samtools_fastq/test_data/ \ No newline at end of file