Skip to content

Commit

Permalink
initial commit faidx
Browse files Browse the repository at this point in the history
  • Loading branch information
emmarousseau committed Apr 13, 2024
1 parent 5ea8c78 commit 9b21fd4
Show file tree
Hide file tree
Showing 10 changed files with 234 additions and 0 deletions.
93 changes: 93 additions & 0 deletions src/samtools/samtools_faidx/config.vsh.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
name: samtools_faidx
namespace: samtools
description: Indexes FASTA files to enable random access to fasta and fastq files.
keywords: [ idex, fasta, faidx ]
links:
homepage: https://www.htslib.org/
documentation: https://www.htslib.org/doc/faidx.html
repository: https://github.com/samtools/samtools
references:
doi: [10.1093/bioinformatics/btp352, 10.1093/gigascience/giab008]
license: MIT/Expat

argument_groups:
- name: Inputs
arguments:
- name: --fasta
type: file
description: |
FASTA input file.
- name: --length
alternatives: -n
type: integer
description: |
Length of FASTA sequence line.
default: 60
- name: --region_file
alternatives: -r
type: file
description: |
File of regions. Format is chr:from-to. One per line.
- name: Options
arguments:
- name: --continue
type: boolean_true
description: |
Continue after trying to retrieve missing region.
- name: --reverse_complement
alternatives: -i
type: boolean_true
description: |
Reverse complement sequences.
- name: Outputs
arguments:
- name: --output
alternatives: -o
type: file
description: |
Write FASTA to file.
required: true
direction: output
example: output.fasta
- name: --mark_strand
type: string
description: |
Add strand indicator to sequence name. Options are:
[ rc, no, sign, custom,<pos>,<neg> ]
default: rc
- name: --fai_idx
type: file
description: |
Name of the index file (default file.fa.fai).
direction: output
example: file.fa.fai
- name: --gzi_idx
type: file
description: |
Name of compressed file index (default file.fa.gz.gzi).
direction: output
example: file.fa.gz.gzi
- name: --fastq
type: boolean_true
description: |
File and index in FASTQ format.
resources:
- type: bash_script
path: script.sh
test_resources:
- type: bash_script
path: test.sh
- type: file
path: test_data
engines:
- type: docker
image: quay.io/biocontainers/samtools:1.19.2--h50ea8bc_1
setup:
- type: docker
run: |
samtools --version 2>&1 | grep -E '^(samtools|Using htslib)' | \
sed 's#Using ##;s# \([0-9\.]*\)$#: \1#' > /var/software_versions.txt
runners:
- type: executable
- type: nextflow
19 changes: 19 additions & 0 deletions src/samtools/samtools_faidx/help.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
```sh
samtools faidx -h
```
Usage: samtools faidx <file.fa|file.fa.gz> [<reg> [...]]
Option:
-o, --output FILE Write FASTA to file.
-n, --length INT Length of FASTA sequence line. [60]
-c, --continue Continue after trying to retrieve missing region.
-r, --region-file FILE File of regions. Format is chr:from-to. One per line.
-i, --reverse-complement Reverse complement sequences.
--mark-strand TYPE Add strand indicator to sequence name
TYPE = rc for /rc on negative strand (default)
no for no strand indicator
sign for (+) / (-)
custom,<pos>,<neg> for custom indicator
--fai-idx FILE name of the index file (default file.fa.fai).
--gzi-idx FILE name of compressed file index (default file.fa.gz.gzi).
-f, --fastq File and index in FASTQ format.
-h, --help This message.
24 changes: 24 additions & 0 deletions src/samtools/samtools_faidx/script.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#!/bin/bash

## VIASH START
## VIASH END

set -e

[[ "$par_continue" == "false" ]] && unset par_continue
[[ "$par_reverse_complement" == "false" ]] && unset par_reverse_complement
[[ "$par_fastq" == "false" ]] && unset par_fastq

samtools faidx \
"$par_fasta" \
-o "$par_output" \
${par_length:+-l "$par_length"} \
${par_continue:+-c} \
${part_region_file:+-r "$par_region_file"} \
${par_revferse_complement:+-r} \
${par_mark_strand:+--mark-strand "$par_mark_strand"} \
${par_fai_idx:+--fai-idx "$par_fai_idx"} \
${par_gzi_idx:+--gzi-idx "$par_gzi_idx"} \
${par_fastq:+-f}

exit 0
39 changes: 39 additions & 0 deletions src/samtools/samtools_faidx/test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
#!/bin/bash

test_dir="${meta_resources_dir}/test_data"
echo ">>> Testing $meta_functionality_name"

"$meta_executable" \
"$test_dir/test.fasta" \
--output "$test_dir/test.fasta.fai" \
--continue \
--fai-idx

echo ">>> Checking whether output exists"
[ ! -f "$test_dir/test.fasta.fai" ] && echo "File 'test.fasta.fai' does not exist!" && exit 1
[ ! -f "$test_dir/test.fasta.fai.idx" ] && echo "File 'test.fasta.fai.idx' does not exist!" && exit 1
[ ! -f "$test_dir/test.fasta.gzi.idx" ] && echo "File 'test.fasta.gzi.idx' does not exist!" && exit 1

echo ">>> Checking whether output is non-empty"
[ ! -s "$test_dir/test.fasta.fai" ] && echo "File 'test.fasta.fai' is empty!" && exit 1
[ ! -s "$test_dir/test.fasta.fai.idx" ] && echo "File 'test.fasta.fai.idx' is empty!" && exit 1
[ ! -s "$test_dir/test.fasta.gzi.idx" ] && echo "File 'test.fasta.gzi.idx' is empty!" && exit 1

echo ">>> Checking whether output is correct"
diff "$test_dir/a.flagstat" "$test_dir/a_ref.flagstat" || \
(echo "Output file a.flagstat does not match expected output" && exit 1)

rm "$test_dir/a.flagstat"

echo ">>> Test 2:"

"$meta_executable" \
"$test_dir/test.fasta" \
--output "$test_dir/test.fasta.fai" \
--length 60 \
--continue \
--gzi-idx "$test_dir/test.fasta.gz.gzi" \


echo "All tests succeeded!"
exit 0
10 changes: 10 additions & 0 deletions src/samtools/samtools_faidx/test_data/script.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#!/bin/bash

## VIASH START
## VIASH END

wget https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq3/reference/transcriptome.fasta

head -n 23 transcriptome.fasta > test.fasta # kepp only 4 first entries of the file for testing.

rm transcriptome.fasta
23 changes: 23 additions & 0 deletions src/samtools/samtools_faidx/test_data/test.fasta
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
>YAL069W CDS=1-315
ATGATCGTAAATAACACACACGTGCTTACCCTACCACTTTATACCACCACCACATGCCATACTCACCCTC
ACTTGTATACTGATTTTACGTACGCACACGGATGCTACAGTATATACCATCTCAAACTTACCCTACTCTC
AGATTCCACTTCACTCCATGGCCCATCTCTCACTGAATCAGTACCAAATGCACTCACATCATTATGCACG
GCACTTGCCTCAGCGGTCTATACCCTGTGCCATTTACCCATAACGCCCATCATTATCCACATTTTGATAT
CTATATCTCATTCGGCGGTCCCAAATATTGTATAA
>YAL068W-A CDS=1-255
ATGCACGGCACTTGCCTCAGCGGTCTATACCCTGTGCCATTTACCCATAACGCCCATCATTATCCACATT
TTGATATCTATATCTCATTCGGCGGTCCCAAATATTGTATAACTGCCCTTAATACATACGTTATACCACT
TTTGCACCATATACTTACCACTCCATTTATATACACTTATGTCAATATTACAGAAAAATCCCCACAAAAA
TCACCTAAACATAAAAATATTCTACTTTTCAACAATAATACATAA
>YAL068C CDS=1-363
ATGGTCAAATTAACTTCAATCGCCGCTGGTGTCGCTGCCATCGCTGCTACTGCTTCTGCAACCACCACTC
TAGCTCAATCTGACGAAAGAGTCAACTTGGTGGAATTGGGTGTCTACGTCTCTGATATCAGAGCTCACTT
AGCCCAATACTACATGTTCCAAGCCGCCCACCCAACTGAAACCTACCCAGTCGAAGTTGCTGAAGCCGTT
TTCAACTACGGTGACTTCACCACCATGTTGACCGGTATTGCTCCAGACCAAGTGACCAGAATGATCACCG
GTGTTCCATGGTACTCCAGCAGATTAAAGCCAGCCATCTCCAGTGCTCTATCCAAGGACGGTATCTACAC
TATCGCAAACTAG
>YAL067W-A CDS=1-228
ATGCCAATTATAGGGGTGCCGAGGTGCCTTATAAAACCCTTTTCTGTGCCTGTGACATTTCCTTTTTCGG
TCAAAAAGAATATCCGAATTTTAGATTTGGACCCTCGTACAGAAGCTTATTGTCTAAGCCTGAATTCAGT
CTGCTTTAAACGGCTTCCGCGGAGGAAATATTTCCATCTCTTGAATTCGTACAACATTAAACGTGTGTTG
GGAGTCGTATACTGTTAG
14 changes: 14 additions & 0 deletions src/samtools/samtools_faidx/test_data/test.fastq
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
@fastq1
ATGCATGCATGCATGCATGCATGCATGCAT
GCATGCATGCATGCATGCATGCATGCATGC
ATGCAT
+
FFFA@@FFFFFFFFFFHHB:::@BFFFFGG
HIHIIIIIIIIIIIIIIIIIIIIIIIFFFF
8011<<
@fastq2
ATGCATGCATGCAT
GCATGCATGCATGC
+
IIA94445EEII==
=>IIIIIIIIICCC
4 changes: 4 additions & 0 deletions src/samtools/samtools_faidx/test_data/test1.fasta.fai
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
YAL069W 315 19 70 71
YAL068W-A 255 360 70 71
YAL068C 363 638 70 71
YAL067W-A 228 1028 70 71
4 changes: 4 additions & 0 deletions src/samtools/samtools_faidx/test_data/test2.fasta
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
YAL069W 315 19 70 71
YAL068W-A 255 360 70 71
YAL068C 363 638 70 71
YAL067W-A 228 1028 70 71
4 changes: 4 additions & 0 deletions src/samtools/samtools_faidx/test_data/test_out.fasta
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
YAL069W 315 19 70 71
YAL068W-A 255 360 70 71
YAL068C 363 638 70 71
YAL067W-A 228 1028 70 71

0 comments on commit 9b21fd4

Please sign in to comment.