diff --git a/CHANGELOG.md b/CHANGELOG.md index d51fcf12..f2892df6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -30,6 +30,11 @@ * `agat/agat_convert_embl2gff`: convert an EMBL file into GFF format (PR #99). +* `bedtools`: + - `bedtools/bedtools_intersect`: Allows one to screen for overlaps between two sets of genomic features (PR #94). + - `bedtools/bedtools_sort`: Sorts a feature file (bed/gff/vcf) by chromosome and other criteria (PR #98). + - `bedtools/bedtools_bamtofastq`: Convert BAM alignments to FASTQ files (PR #101). + ## MINOR CHANGES * `busco` components: update BUSCO to `5.7.1` (PR #72). diff --git a/src/bedtools/bedtools_bamtofastq/config.vsh.yaml b/src/bedtools/bedtools_bamtofastq/config.vsh.yaml new file mode 100644 index 00000000..cd30cb71 --- /dev/null +++ b/src/bedtools/bedtools_bamtofastq/config.vsh.yaml @@ -0,0 +1,74 @@ +name: bedtools_bamtofastq +namespace: bedtools +description: | + Conversion tool for extracting FASTQ records from sequence alignments in BAM format. +keywords: [Conversion ,BAM, FASTQ] +links: + documentation: https://bedtools.readthedocs.io/en/latest/content/tools/bamtofastq.html + repository: https://github.com/arq5x/bedtools2 + homepage: https://bedtools.readthedocs.io/en/latest/# + issue_tracker: https://github.com/arq5x/bedtools2/issues +references: + doi: 10.1093/bioinformatics/btq033 +license: MIT +requirements: + commands: [bedtools] +authors: + - __merge__: /src/_authors/theodoro_gasperin.yaml + roles: [ author, maintainer ] + +argument_groups: + - name: Inputs + arguments: + - name: --input + alternatives: -i + type: file + description: Input BAM file to be converted to FASTQ. + required: true + + - name: Outputs + arguments: + - name: --fastq + alternatives: -fq + direction: output + type: file + description: Output FASTQ file. + required: true + + - name: --fastq2 + alternatives: -fq2 + type: file + direction: output + description: | + FASTQ for second end. Used if BAM contains paired-end data. + BAM should be sorted by query name is creating paired FASTQ. + + - name: Options + arguments: + - name: --tags + type: boolean_true + description: | + Create FASTQ based on the mate info in the BAM R2 and Q2 tags. + +resources: + - type: bash_script + path: script.sh + +test_resources: + - type: bash_script + path: test.sh + - path: test_data + +engines: + - type: docker + image: debian:stable-slim + setup: + - type: apt + packages: [bedtools, procps] + - type: docker + run: | + echo "bedtools: \"$(bedtools --version | sed -n 's/^bedtools //p')\"" > /var/software_versions.txt + +runners: + - type: executable + - type: nextflow \ No newline at end of file diff --git a/src/bedtools/bedtools_bamtofastq/help.txt b/src/bedtools/bedtools_bamtofastq/help.txt new file mode 100644 index 00000000..ed2b7468 --- /dev/null +++ b/src/bedtools/bedtools_bamtofastq/help.txt @@ -0,0 +1,25 @@ +```bash +bedtools bamtofastq +``` + +Tool: bedtools bamtofastq (aka bamToFastq) +Version: v2.30.0 +Summary: Convert BAM alignments to FASTQ files. + +Usage: bamToFastq [OPTIONS] -i -fq + +Options: + -fq2 FASTQ for second end. Used if BAM contains paired-end data. + BAM should be sorted by query name is creating paired FASTQ. + + -tags Create FASTQ based on the mate info + in the BAM R2 and Q2 tags. + +Tips: + If you want to create a single, interleaved FASTQ file + for paired-end data, you can just write both to /dev/stdout: + + bedtools bamtofastq -i x.bam -fq /dev/stdout -fq2 /dev/stdout > x.ilv.fq + + Also, the samtools fastq command has more fucntionality and is a useful alternative. + diff --git a/src/bedtools/bedtools_bamtofastq/script.sh b/src/bedtools/bedtools_bamtofastq/script.sh new file mode 100644 index 00000000..4b32f2d7 --- /dev/null +++ b/src/bedtools/bedtools_bamtofastq/script.sh @@ -0,0 +1,19 @@ +#!/bin/bash + +## VIASH START +## VIASH END + +# Exit on error +set -eo pipefail + +# Unset parameters +[[ "$par_tags" == "false" ]] && unset par_tags + +# Execute bedtools bamtofastq with the provided arguments +bedtools bamtofastq \ + ${par_tags:+-tags} \ + ${par_fastq2:+-fq2 "$par_fastq2"} \ + -i "$par_input" \ + -fq "$par_fastq" + + diff --git a/src/bedtools/bedtools_bamtofastq/test.sh b/src/bedtools/bedtools_bamtofastq/test.sh new file mode 100644 index 00000000..6d913d85 --- /dev/null +++ b/src/bedtools/bedtools_bamtofastq/test.sh @@ -0,0 +1,84 @@ +#!/bin/bash + +# exit on error +set -eo pipefail + +test_data="$meta_resources_dir/test_data" + +############################################# +# helper functions +assert_file_exists() { + [ -f "$1" ] || { echo "File '$1' does not exist" && exit 1; } +} +assert_file_not_empty() { + [ -s "$1" ] || { echo "File '$1' is empty but shouldn't be" && exit 1; } +} +assert_file_contains() { + grep -q "$2" "$1" || { echo "File '$1' does not contain '$2'" && exit 1; } +} +assert_identical_content() { + diff -a "$2" "$1" \ + || (echo "Files are not identical!" && exit 1) +} +############################################# + +# Test 1: normal conversion +mkdir test1 +cd test1 + +echo "> Run bedtools bamtofastq on BAM file" +"$meta_executable" \ + --input "$test_data/example.bam" \ + --fastq "output.fastq" + +# checks +assert_file_exists "output.fastq" +assert_file_not_empty "output.fastq" +assert_identical_content "output.fastq" "$test_data/expected.fastq" +echo "- test1 succeeded -" + +cd .. + +# Test 2: with tags +mkdir test2 +cd test2 + +echo "> Run bedtools bamtofastq on BAM file with tags" +"$meta_executable" \ + --input "$test_data/example.bam" \ + --fastq "output.fastq" \ + --tags + +# checks +assert_file_exists "output.fastq" +assert_file_not_empty "output.fastq" +assert_identical_content "output.fastq" "$test_data/expected.fastq" +echo "- test2 succeeded -" + +cd .. + +# Test 3: with option fq2 +mkdir test3 +cd test3 + +echo "> Run bedtools bamtofastq on BAM file with output_fq2" +"$meta_executable" \ + --input "$test_data/example.bam" \ + --fastq "output1.fastq" \ + --fastq2 "output2.fastq" + +# checks +assert_file_exists "output1.fastq" +assert_file_not_empty "output1.fastq" +assert_identical_content "output1.fastq" "$test_data/expected_1.fastq" +assert_file_exists "output2.fastq" +assert_file_not_empty "output2.fastq" +assert_identical_content "output2.fastq" "$test_data/expected_2.fastq" +echo "- test3 succeeded -" + +cd .. + +echo "All tests succeeded" +exit 0 + + diff --git a/src/bedtools/bedtools_bamtofastq/test_data/example.bam b/src/bedtools/bedtools_bamtofastq/test_data/example.bam new file mode 100644 index 00000000..ffc075ab Binary files /dev/null and b/src/bedtools/bedtools_bamtofastq/test_data/example.bam differ diff --git a/src/bedtools/bedtools_bamtofastq/test_data/example.sam b/src/bedtools/bedtools_bamtofastq/test_data/example.sam new file mode 100644 index 00000000..4afb0aef --- /dev/null +++ b/src/bedtools/bedtools_bamtofastq/test_data/example.sam @@ -0,0 +1,3 @@ +@SQ SN:chr2:172936693-172938111 LN:1418 +my_read 99 chr2:172936693-172938111 129 60 100M = 429 400 CTAACTAGCCTGGGAAAAAAGGATAGTGTCTCTCTGTTCTTTCATAGGAAATGTTGAATCAGACCCCTACTGGGAAAAGAAATTTAATGCATATCTCACT * XT:A:U NM:i:0 SM:i:37 AM:i:37 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:100 +my_read 147 chr2:172936693-172938111 429 60 100M = 129 -400 TCGAGCTCTGCATTCATGGCTGTGTCTAAAGGGCATGTCAGCCTTTGATTCTCTCTGAGAGGTAATTATCCTTTTCCTGTCACGGAACAACAAATGATAG * XT:A:U NM:i:0 SM:i:37 AM:i:37 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:100 diff --git a/src/bedtools/bedtools_bamtofastq/test_data/expected.fastq b/src/bedtools/bedtools_bamtofastq/test_data/expected.fastq new file mode 100644 index 00000000..358331d4 --- /dev/null +++ b/src/bedtools/bedtools_bamtofastq/test_data/expected.fastq @@ -0,0 +1,16 @@ +@my_read +CTAACTAGCCTGGGAAAAAAGGATAGTGTCTCTCTGTTCTTTCATAGGAAATGTTGAATCAGACCCCTACTGGGAAAAGAAATTTAATGCATATCTCACT ++ +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +@my_read +CTAACTAGCCTGGGAAAAAAGGATAGTGTCTCTCTGTTCTTTCATAGGAAATGTTGAATCAGACCCCTACTGGGAAAAGAAATTTAATGCATATCTCACT ++ +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +@my_read +CTATCATTTGTTGTTCCGTGACAGGAAAAGGATAATTACCTCTCAGAGAGAATCAAAGGCTGACATGCCCTTTAGACACAGCCATGAATGCAGAGCTCGA ++ +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +@my_read +CTATCATTTGTTGTTCCGTGACAGGAAAAGGATAATTACCTCTCAGAGAGAATCAAAGGCTGACATGCCCTTTAGACACAGCCATGAATGCAGAGCTCGA ++ +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! diff --git a/src/bedtools/bedtools_bamtofastq/test_data/expected_1.fastq b/src/bedtools/bedtools_bamtofastq/test_data/expected_1.fastq new file mode 100644 index 00000000..c5dfe571 --- /dev/null +++ b/src/bedtools/bedtools_bamtofastq/test_data/expected_1.fastq @@ -0,0 +1,4 @@ +@my_read/1 +CTAACTAGCCTGGGAAAAAAGGATAGTGTCTCTCTGTTCTTTCATAGGAAATGTTGAATCAGACCCCTACTGGGAAAAGAAATTTAATGCATATCTCACT ++ +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! diff --git a/src/bedtools/bedtools_bamtofastq/test_data/expected_2.fastq b/src/bedtools/bedtools_bamtofastq/test_data/expected_2.fastq new file mode 100644 index 00000000..50d1bd62 --- /dev/null +++ b/src/bedtools/bedtools_bamtofastq/test_data/expected_2.fastq @@ -0,0 +1,4 @@ +@my_read/2 +CTATCATTTGTTGTTCCGTGACAGGAAAAGGATAATTACCTCTCAGAGAGAATCAAAGGCTGACATGCCCTTTAGACACAGCCATGAATGCAGAGCTCGA ++ +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! diff --git a/src/bedtools/bedtools_bamtofastq/test_data/script.sh b/src/bedtools/bedtools_bamtofastq/test_data/script.sh new file mode 100755 index 00000000..df595510 --- /dev/null +++ b/src/bedtools/bedtools_bamtofastq/test_data/script.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +# create sam file +printf "@SQ\tSN:chr2:172936693-172938111\tLN:1418\n" > example.sam +printf "my_read\t99\tchr2:172936693-172938111\t129\t60\t100M\t=\t429\t400\tCTAACTAGCCTGGGAAAAAAGGATAGTGTCTCTCTGTTCTTTCATAGGAAATGTTGAATCAGACCCCTACTGGGAAAAGAAATTTAATGCATATCTCACT\t*\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:100\n" >> example.sam +printf "my_read\t147\tchr2:172936693-172938111\t429\t60\t100M\t=\t129\t-400\tTCGAGCTCTGCATTCATGGCTGTGTCTAAAGGGCATGTCAGCCTTTGATTCTCTCTGAGAGGTAATTATCCTTTTCCTGTCACGGAACAACAAATGATAG\t*\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:100\n" >> example.sam + +# create bam file +# samtools view -b example.sam > example.bam + +# create fastq files +# bedtools bamtofastq -i example.bam -fq expected.fastq +# bedtools bamtofastq -i example.bam -fq expected_1.fastq -fq2 expected_2.fastq