diff --git a/src/samtools/samtools_faidx/config.vsh.yaml b/src/samtools/samtools_faidx/config.vsh.yaml new file mode 100644 index 00000000..e94ad280 --- /dev/null +++ b/src/samtools/samtools_faidx/config.vsh.yaml @@ -0,0 +1,93 @@ +name: samtools_faidx +namespace: samtools +description: Indexes FASTA files to enable random access to fasta and fastq files. +keywords: [ idex, fasta, faidx ] +links: + homepage: https://www.htslib.org/ + documentation: https://www.htslib.org/doc/faidx.html + repository: https://github.com/samtools/samtools +references: + doi: [10.1093/bioinformatics/btp352, 10.1093/gigascience/giab008] +license: MIT/Expat + +argument_groups: + - name: Inputs + arguments: + - name: --fasta + type: file + description: | + FASTA input file. + - name: --length + alternatives: -n + type: integer + description: | + Length of FASTA sequence line. + default: 60 + - name: --region_file + alternatives: -r + type: file + description: | + File of regions. Format is chr:from-to. One per line. + - name: Options + arguments: + - name: --continue + type: boolean_true + description: | + Continue after trying to retrieve missing region. + - name: --reverse_complement + alternatives: -i + type: boolean_true + description: | + Reverse complement sequences. + - name: Outputs + arguments: + - name: --output + alternatives: -o + type: file + description: | + Write FASTA to file. + required: true + direction: output + example: output.fasta + - name: --mark_strand + type: string + description: | + Add strand indicator to sequence name. Options are: + [ rc, no, sign, custom,, ] + default: rc + - name: --fai_idx + type: file + description: | + Name of the index file (default file.fa.fai). + direction: output + example: file.fa.fai + - name: --gzi_idx + type: file + description: | + Name of compressed file index (default file.fa.gz.gzi). + direction: output + example: file.fa.gz.gzi + - name: --fastq + type: boolean_true + description: | + File and index in FASTQ format. + +resources: + - type: bash_script + path: script.sh +test_resources: + - type: bash_script + path: test.sh + - type: file + path: test_data +engines: + - type: docker + image: quay.io/biocontainers/samtools:1.19.2--h50ea8bc_1 + setup: + - type: docker + run: | + samtools --version 2>&1 | grep -E '^(samtools|Using htslib)' | \ + sed 's#Using ##;s# \([0-9\.]*\)$#: \1#' > /var/software_versions.txt +runners: +- type: executable +- type: nextflow diff --git a/src/samtools/samtools_faidx/help.txt b/src/samtools/samtools_faidx/help.txt new file mode 100644 index 00000000..89320c6f --- /dev/null +++ b/src/samtools/samtools_faidx/help.txt @@ -0,0 +1,19 @@ +```sh +samtools faidx -h +``` +Usage: samtools faidx [ [...]] +Option: + -o, --output FILE Write FASTA to file. + -n, --length INT Length of FASTA sequence line. [60] + -c, --continue Continue after trying to retrieve missing region. + -r, --region-file FILE File of regions. Format is chr:from-to. One per line. + -i, --reverse-complement Reverse complement sequences. + --mark-strand TYPE Add strand indicator to sequence name + TYPE = rc for /rc on negative strand (default) + no for no strand indicator + sign for (+) / (-) + custom,, for custom indicator + --fai-idx FILE name of the index file (default file.fa.fai). + --gzi-idx FILE name of compressed file index (default file.fa.gz.gzi). + -f, --fastq File and index in FASTQ format. + -h, --help This message. \ No newline at end of file diff --git a/src/samtools/samtools_faidx/script.sh b/src/samtools/samtools_faidx/script.sh new file mode 100644 index 00000000..2b2f625a --- /dev/null +++ b/src/samtools/samtools_faidx/script.sh @@ -0,0 +1,24 @@ +#!/bin/bash + +## VIASH START +## VIASH END + +set -e + +[[ "$par_continue" == "false" ]] && unset par_continue +[[ "$par_reverse_complement" == "false" ]] && unset par_reverse_complement +[[ "$par_fastq" == "false" ]] && unset par_fastq + +samtools faidx \ + "$par_fasta" \ + -o "$par_output" \ + ${par_length:+-l "$par_length"} \ + ${par_continue:+-c} \ + ${part_region_file:+-r "$par_region_file"} \ + ${par_revferse_complement:+-r} \ + ${par_mark_strand:+--mark-strand "$par_mark_strand"} \ + ${par_fai_idx:+--fai-idx "$par_fai_idx"} \ + ${par_gzi_idx:+--gzi-idx "$par_gzi_idx"} \ + ${par_fastq:+-f} + +exit 0 \ No newline at end of file diff --git a/src/samtools/samtools_faidx/test.sh b/src/samtools/samtools_faidx/test.sh new file mode 100644 index 00000000..5212b35d --- /dev/null +++ b/src/samtools/samtools_faidx/test.sh @@ -0,0 +1,39 @@ +#!/bin/bash + +test_dir="${meta_resources_dir}/test_data" +echo ">>> Testing $meta_functionality_name" + +"$meta_executable" \ + "$test_dir/test.fasta" \ + --output "$test_dir/test.fasta.fai" \ + --continue \ + --fai-idx + +echo ">>> Checking whether output exists" +[ ! -f "$test_dir/test.fasta.fai" ] && echo "File 'test.fasta.fai' does not exist!" && exit 1 +[ ! -f "$test_dir/test.fasta.fai.idx" ] && echo "File 'test.fasta.fai.idx' does not exist!" && exit 1 +[ ! -f "$test_dir/test.fasta.gzi.idx" ] && echo "File 'test.fasta.gzi.idx' does not exist!" && exit 1 + +echo ">>> Checking whether output is non-empty" +[ ! -s "$test_dir/test.fasta.fai" ] && echo "File 'test.fasta.fai' is empty!" && exit 1 +[ ! -s "$test_dir/test.fasta.fai.idx" ] && echo "File 'test.fasta.fai.idx' is empty!" && exit 1 +[ ! -s "$test_dir/test.fasta.gzi.idx" ] && echo "File 'test.fasta.gzi.idx' is empty!" && exit 1 + +echo ">>> Checking whether output is correct" +diff "$test_dir/a.flagstat" "$test_dir/a_ref.flagstat" || \ + (echo "Output file a.flagstat does not match expected output" && exit 1) + +rm "$test_dir/a.flagstat" + +echo ">>> Test 2:" + +"$meta_executable" \ + "$test_dir/test.fasta" \ + --output "$test_dir/test.fasta.fai" \ + --length 60 \ + --continue \ + --gzi-idx "$test_dir/test.fasta.gz.gzi" \ + + +echo "All tests succeeded!" +exit 0 \ No newline at end of file diff --git a/src/samtools/samtools_faidx/test_data/script.sh b/src/samtools/samtools_faidx/test_data/script.sh new file mode 100644 index 00000000..ffd5b789 --- /dev/null +++ b/src/samtools/samtools_faidx/test_data/script.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +## VIASH START +## VIASH END + +wget https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq3/reference/transcriptome.fasta + +head -n 23 transcriptome.fasta > test.fasta # kepp only 4 first entries of the file for testing. + +rm transcriptome.fasta \ No newline at end of file diff --git a/src/samtools/samtools_faidx/test_data/test.fasta b/src/samtools/samtools_faidx/test_data/test.fasta new file mode 100644 index 00000000..eee04dde --- /dev/null +++ b/src/samtools/samtools_faidx/test_data/test.fasta @@ -0,0 +1,23 @@ +>YAL069W CDS=1-315 +ATGATCGTAAATAACACACACGTGCTTACCCTACCACTTTATACCACCACCACATGCCATACTCACCCTC +ACTTGTATACTGATTTTACGTACGCACACGGATGCTACAGTATATACCATCTCAAACTTACCCTACTCTC +AGATTCCACTTCACTCCATGGCCCATCTCTCACTGAATCAGTACCAAATGCACTCACATCATTATGCACG +GCACTTGCCTCAGCGGTCTATACCCTGTGCCATTTACCCATAACGCCCATCATTATCCACATTTTGATAT +CTATATCTCATTCGGCGGTCCCAAATATTGTATAA +>YAL068W-A CDS=1-255 +ATGCACGGCACTTGCCTCAGCGGTCTATACCCTGTGCCATTTACCCATAACGCCCATCATTATCCACATT +TTGATATCTATATCTCATTCGGCGGTCCCAAATATTGTATAACTGCCCTTAATACATACGTTATACCACT +TTTGCACCATATACTTACCACTCCATTTATATACACTTATGTCAATATTACAGAAAAATCCCCACAAAAA +TCACCTAAACATAAAAATATTCTACTTTTCAACAATAATACATAA +>YAL068C CDS=1-363 +ATGGTCAAATTAACTTCAATCGCCGCTGGTGTCGCTGCCATCGCTGCTACTGCTTCTGCAACCACCACTC +TAGCTCAATCTGACGAAAGAGTCAACTTGGTGGAATTGGGTGTCTACGTCTCTGATATCAGAGCTCACTT +AGCCCAATACTACATGTTCCAAGCCGCCCACCCAACTGAAACCTACCCAGTCGAAGTTGCTGAAGCCGTT +TTCAACTACGGTGACTTCACCACCATGTTGACCGGTATTGCTCCAGACCAAGTGACCAGAATGATCACCG +GTGTTCCATGGTACTCCAGCAGATTAAAGCCAGCCATCTCCAGTGCTCTATCCAAGGACGGTATCTACAC +TATCGCAAACTAG +>YAL067W-A CDS=1-228 +ATGCCAATTATAGGGGTGCCGAGGTGCCTTATAAAACCCTTTTCTGTGCCTGTGACATTTCCTTTTTCGG +TCAAAAAGAATATCCGAATTTTAGATTTGGACCCTCGTACAGAAGCTTATTGTCTAAGCCTGAATTCAGT +CTGCTTTAAACGGCTTCCGCGGAGGAAATATTTCCATCTCTTGAATTCGTACAACATTAAACGTGTGTTG +GGAGTCGTATACTGTTAG diff --git a/src/samtools/samtools_faidx/test_data/test.fastq b/src/samtools/samtools_faidx/test_data/test.fastq new file mode 100644 index 00000000..b8f8c917 --- /dev/null +++ b/src/samtools/samtools_faidx/test_data/test.fastq @@ -0,0 +1,14 @@ +@fastq1 +ATGCATGCATGCATGCATGCATGCATGCAT +GCATGCATGCATGCATGCATGCATGCATGC +ATGCAT ++ +FFFA@@FFFFFFFFFFHHB:::@BFFFFGG +HIHIIIIIIIIIIIIIIIIIIIIIIIFFFF +8011<< +@fastq2 +ATGCATGCATGCAT +GCATGCATGCATGC ++ +IIA94445EEII== +=>IIIIIIIIICCC \ No newline at end of file diff --git a/src/samtools/samtools_faidx/test_data/test1.fasta.fai b/src/samtools/samtools_faidx/test_data/test1.fasta.fai new file mode 100644 index 00000000..475dde4d --- /dev/null +++ b/src/samtools/samtools_faidx/test_data/test1.fasta.fai @@ -0,0 +1,4 @@ +YAL069W 315 19 70 71 +YAL068W-A 255 360 70 71 +YAL068C 363 638 70 71 +YAL067W-A 228 1028 70 71 diff --git a/src/samtools/samtools_faidx/test_data/test2.fasta b/src/samtools/samtools_faidx/test_data/test2.fasta new file mode 100644 index 00000000..475dde4d --- /dev/null +++ b/src/samtools/samtools_faidx/test_data/test2.fasta @@ -0,0 +1,4 @@ +YAL069W 315 19 70 71 +YAL068W-A 255 360 70 71 +YAL068C 363 638 70 71 +YAL067W-A 228 1028 70 71 diff --git a/src/samtools/samtools_faidx/test_data/test_out.fasta b/src/samtools/samtools_faidx/test_data/test_out.fasta new file mode 100644 index 00000000..475dde4d --- /dev/null +++ b/src/samtools/samtools_faidx/test_data/test_out.fasta @@ -0,0 +1,4 @@ +YAL069W 315 19 70 71 +YAL068W-A 255 360 70 71 +YAL068C 363 638 70 71 +YAL067W-A 228 1028 70 71