forked from viash-hub/biobox
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* initial commit dedup * Revert "initial commit dedup" This reverts commit 38f586b. * initial commit faidx * Add tests and adjust script * Update changelog
- Loading branch information
1 parent
4785188
commit 53a1a7f
Showing
16 changed files
with
324 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,95 @@ | ||
name: samtools_faidx | ||
namespace: samtools | ||
description: Indexes FASTA files to enable random access to fasta and fastq files. | ||
keywords: [ idex, fasta, faidx ] | ||
links: | ||
homepage: https://www.htslib.org/ | ||
documentation: https://www.htslib.org/doc/samtools-faidx.html | ||
repository: https://github.com/samtools/samtools | ||
references: | ||
doi: [10.1093/bioinformatics/btp352, 10.1093/gigascience/giab008] | ||
license: MIT/Expat | ||
|
||
argument_groups: | ||
- name: Inputs | ||
arguments: | ||
- name: --input | ||
type: file | ||
description: | | ||
FASTA input file. | ||
- name: --length | ||
alternatives: -n | ||
type: integer | ||
description: | | ||
Length for FASTA sequence line wrapping. If zero, this means do not | ||
line wrap. Defaults to the line length in the input file. | ||
default: 60 | ||
- name: --region_file | ||
alternatives: -r | ||
type: file | ||
description: | | ||
File of regions. Format is chr:from-to. One per line. | ||
Must be used with --output to avoid sending output to stdout. | ||
- name: Options | ||
arguments: | ||
- name: --continue | ||
type: boolean_true | ||
description: | | ||
Continue working if a non-existent region is requested. | ||
- name: --reverse_complement | ||
alternatives: -i | ||
type: boolean_true | ||
description: | | ||
Reverse complement sequences. | ||
- name: Outputs | ||
arguments: | ||
- name: --output | ||
alternatives: -o | ||
type: file | ||
description: | | ||
Write output to file. | ||
direction: output | ||
required: true | ||
example: output.fasta | ||
- name: --mark_strand | ||
type: string | ||
description: | | ||
Add strand indicator to sequence name. Options are: | ||
[ rc, no, sign, custom,<pos>,<neg> ] | ||
default: rc | ||
- name: --fai_idx | ||
type: file | ||
description: | | ||
Read/Write to specified index file (default file.fa.fai). | ||
direction: output | ||
example: file.fa.fai | ||
- name: --gzi_idx | ||
type: file | ||
description: | | ||
Read/Write to specified compressed file index (used with .gz files, default file.fa.gz.gzi). | ||
direction: output | ||
example: file.fa.gz.gzi | ||
- name: --fastq | ||
type: boolean_true | ||
description: | | ||
Read FASTQ files and output extracted sequences in FASTQ format. Same as using samtools fqidx. | ||
resources: | ||
- type: bash_script | ||
path: script.sh | ||
test_resources: | ||
- type: bash_script | ||
path: test.sh | ||
- type: file | ||
path: test_data | ||
engines: | ||
- type: docker | ||
image: quay.io/biocontainers/samtools:1.19.2--h50ea8bc_1 | ||
setup: | ||
- type: docker | ||
run: | | ||
samtools --version 2>&1 | grep -E '^(samtools|Using htslib)' | \ | ||
sed 's#Using ##;s# \([0-9\.]*\)$#: \1#' > /var/software_versions.txt | ||
runners: | ||
- type: executable | ||
- type: nextflow |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
```sh | ||
samtools faidx -h | ||
``` | ||
Usage: samtools faidx <file.fa|file.fa.gz> [<reg> [...]] | ||
Option: | ||
-o, --output FILE Write FASTA to file. | ||
-n, --length INT Length of FASTA sequence line. [60] | ||
-c, --continue Continue after trying to retrieve missing region. | ||
-r, --region-file FILE File of regions. Format is chr:from-to. One per line. | ||
-i, --reverse-complement Reverse complement sequences. | ||
--mark-strand TYPE Add strand indicator to sequence name | ||
TYPE = rc for /rc on negative strand (default) | ||
no for no strand indicator | ||
sign for (+) / (-) | ||
custom,<pos>,<neg> for custom indicator | ||
--fai-idx FILE name of the index file (default file.fa.fai). | ||
--gzi-idx FILE name of compressed file index (default file.fa.gz.gzi). | ||
-f, --fastq File and index in FASTQ format. | ||
-h, --help This message. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
#!/bin/bash | ||
|
||
## VIASH START | ||
## VIASH END | ||
|
||
set -e | ||
|
||
[[ "$par_continue" == "false" ]] && unset par_continue | ||
[[ "$par_reverse_complement" == "false" ]] && unset par_reverse_complement | ||
[[ "$par_fastq" == "false" ]] && unset par_fastq | ||
|
||
samtools faidx \ | ||
"$par_input" \ | ||
${par_output:+-o "$par_output"} \ | ||
${par_length:+-n "$par_length"} \ | ||
${par_continue:+-c} \ | ||
${par_region_file:+-r "$par_region_file"} \ | ||
${par_reverse_complement:+-r} \ | ||
${par_mark_strand:+--mark-strand "$par_mark_strand"} \ | ||
${par_fai_idx:+--fai-idx "$par_fai_idx"} \ | ||
${par_gzi_idx:+--gzi-idx "$par_gzi_idx"} \ | ||
${par_fastq:+-f} | ||
|
||
exit 0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
#!/bin/bash | ||
|
||
test_dir="${meta_resources_dir}/test_data" | ||
echo ">>> Testing $meta_functionality_name" | ||
|
||
"$meta_executable" \ | ||
--input "$test_dir/test.fasta" \ | ||
--output "$test_dir/test.fasta.fai" | ||
|
||
echo "$meta_executable" | ||
echo "$test_dir/test.fasta" | ||
|
||
echo ">>> Checking whether output exists" | ||
[ ! -f "$test_dir/test.fasta.fai" ] && echo "File 'test.fasta.fai' does not exist!" && exit 1 | ||
|
||
echo ">>> Checking whether output is non-empty" | ||
[ ! -s "$test_dir/test.fasta.fai" ] && echo "File 'test.fasta.fai' is empty!" && exit 1 | ||
|
||
echo ">>> Checking whether output is correct" | ||
diff "$test_dir/test.fasta.fai" "$test_dir/output/test.fasta.fai" || \ | ||
(echo "Output file test.fasta.fai does not match expected output" && exit 1) | ||
|
||
rm "$test_dir/test.fasta.fai" | ||
|
||
#################################################################################################### | ||
|
||
echo ">>> Test 2: ${meta_functionality_name} with bgzipped input" | ||
|
||
"$meta_executable" \ | ||
--input "$test_dir/test.fasta.gz" \ | ||
--output "$test_dir/test.fasta.gz.fai" | ||
|
||
echo ">>> Checking whether output exists"1 | ||
[ ! -f "$test_dir/test.fasta.gz.fai" ] && echo "File 'test.fasta.gz.fai' does not exist!" && exit 1 | ||
[ ! -f "$test_dir/test.fasta.gz.gzi" ] && echo "File 'test.fasta.gz.gzi' does not exist!" && exit 1 | ||
|
||
echo ">>> Checking whether output is non-empty" | ||
[ ! -s "$test_dir/test.fasta.gz.fai" ] && echo "File 'test.fasta.gz.fai' is empty!" && exit 1 | ||
[ ! -s "$test_dir/test.fasta.gz.gzi" ] && echo "File 'test.fasta.gz.gzi' is empty!" && exit 1 | ||
|
||
echo ">>> Checking whether output is correct" | ||
diff "$test_dir/test.fasta.gz.fai" "$test_dir/output/test.fasta.gz.fai" || \ | ||
(echo "Output file test_zip.fasta.gz.fai does not match expected output" && exit 1) | ||
diff "$test_dir/test.fasta.gz.gzi" "$test_dir/output/test.fasta.gz.gzi" || \ | ||
(echo "Output file test2.fasta.gz.gzi does not match expected output" && exit 1) | ||
|
||
rm "$test_dir/test.fasta.gz.fai" | ||
rm "$test_dir/test.fasta.gz.gzi" | ||
|
||
#################################################################################################### | ||
|
||
echo ">>> Test 3: ${meta_functionality_name} with fastq input" | ||
|
||
"$meta_executable" \ | ||
--input "$test_dir/test.fastq" \ | ||
--output "$test_dir/test.fastq.fai" | ||
|
||
echo ">>> Checking whether output exists" | ||
[ ! -f "$test_dir/test.fastq.fai" ] && echo "File 'test.fastq.fai' does not exist!" && exit 1 | ||
|
||
echo ">>> Checking whether output is non-empty" | ||
[ ! -s "$test_dir/test.fastq.fai" ] && echo "File 'test.fastq.fai' is empty!" && exit 1 | ||
|
||
echo ">>> Checking whether output is correct" | ||
diff "$test_dir/test.fastq.fai" "$test_dir/output/test.fastq.fai" || \ | ||
(echo "Output file test.fastq.fai does not match expected output" && exit 1) | ||
|
||
rm "$test_dir/test.fastq.fai" | ||
|
||
#################################################################################################### | ||
|
||
echo ">>> Test 4: ${meta_functionality_name} with region file containing non-existent regions and | ||
specific fasta line wrap length" | ||
|
||
"$meta_executable" \ | ||
--input "$test_dir/test.fasta" \ | ||
--output "$test_dir/regions.fasta" \ | ||
--length 10 \ | ||
--continue \ | ||
--region_file "$test_dir/test.regions" \ | ||
--fai_idx "$test_dir/regions.fasta.fai" | ||
|
||
echo ">>> Checking whether output exists" | ||
[ ! -f "$test_dir/regions.fasta" ] && echo "File 'regions.fasta' does not exist!" && exit 1 | ||
[ ! -f "$test_dir/regions.fasta.fai" ] && echo "File 'regions.fasta.fai' does not exist!" && exit 1 | ||
|
||
echo ">>> Checking whether output is non-empty" | ||
[ ! -s "$test_dir/regions.fasta" ] && echo "File 'regions.fasta' is empty!" && exit 1 | ||
[ ! -s "$test_dir/regions.fasta.fai" ] && echo "File 'regions.fasta.fai' is empty!" && exit 1 | ||
|
||
echo ">>> Checking whether output is correct" | ||
diff "$test_dir/regions.fasta" "$test_dir/output/regions.fasta" || \ | ||
(echo "Output file regions.fasta does not match expected output" && exit 1) | ||
diff "$test_dir/regions.fasta.fai" "$test_dir/output/regions.fasta.fai" || \ | ||
(echo "Output file regions.fasta.fai does not match expected output" && exit 1) | ||
|
||
rm "$test_dir/regions.fasta" | ||
rm "$test_dir/regions.fasta.fai" | ||
|
||
#################################################################################################### | ||
|
||
echo "All tests succeeded!" | ||
exit 0 | ||
|
14 changes: 14 additions & 0 deletions
14
src/samtools/samtools_faidx/test_data/output/regions.fasta
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
>YAL069W:300-315 | ||
CCCAAATATT | ||
GTATAA | ||
>YAL068C:200-230 | ||
CTGAAGCCGT | ||
TTTCAACTAC | ||
GGTGACTTCA | ||
C | ||
>YAL067W-A:115-145 | ||
GCTTATTGTC | ||
TAAGCCTGAA | ||
TTCAGTCTGC | ||
T | ||
>chr1:1-100 |
4 changes: 4 additions & 0 deletions
4
src/samtools/samtools_faidx/test_data/output/regions.fasta.fai
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
YAL069W 315 19 70 71 | ||
YAL068W-A 255 360 70 71 | ||
YAL068C 363 638 70 71 | ||
YAL067W-A 228 1028 70 71 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
YAL069W 315 19 70 71 | ||
YAL068W-A 255 360 70 71 | ||
YAL068C 363 638 70 71 | ||
YAL067W-A 228 1028 70 71 |
4 changes: 4 additions & 0 deletions
4
src/samtools/samtools_faidx/test_data/output/test.fasta.gz.fai
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
YAL069W 315 19 70 71 | ||
YAL068W-A 255 360 70 71 | ||
YAL068C 363 638 70 71 | ||
YAL067W-A 228 1028 70 71 |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
fastq1 66 8 30 31 79 | ||
fastq2 28 156 14 15 188 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
#!/bin/bash | ||
|
||
## VIASH START | ||
## VIASH END | ||
|
||
wget https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq3/reference/transcriptome.fasta | ||
|
||
head -n 23 transcriptome.fasta > test.fasta # kepp only 4 first entries of the file for testing. | ||
|
||
rm transcriptome.fasta |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
>YAL069W CDS=1-315 | ||
ATGATCGTAAATAACACACACGTGCTTACCCTACCACTTTATACCACCACCACATGCCATACTCACCCTC | ||
ACTTGTATACTGATTTTACGTACGCACACGGATGCTACAGTATATACCATCTCAAACTTACCCTACTCTC | ||
AGATTCCACTTCACTCCATGGCCCATCTCTCACTGAATCAGTACCAAATGCACTCACATCATTATGCACG | ||
GCACTTGCCTCAGCGGTCTATACCCTGTGCCATTTACCCATAACGCCCATCATTATCCACATTTTGATAT | ||
CTATATCTCATTCGGCGGTCCCAAATATTGTATAA | ||
>YAL068W-A CDS=1-255 | ||
ATGCACGGCACTTGCCTCAGCGGTCTATACCCTGTGCCATTTACCCATAACGCCCATCATTATCCACATT | ||
TTGATATCTATATCTCATTCGGCGGTCCCAAATATTGTATAACTGCCCTTAATACATACGTTATACCACT | ||
TTTGCACCATATACTTACCACTCCATTTATATACACTTATGTCAATATTACAGAAAAATCCCCACAAAAA | ||
TCACCTAAACATAAAAATATTCTACTTTTCAACAATAATACATAA | ||
>YAL068C CDS=1-363 | ||
ATGGTCAAATTAACTTCAATCGCCGCTGGTGTCGCTGCCATCGCTGCTACTGCTTCTGCAACCACCACTC | ||
TAGCTCAATCTGACGAAAGAGTCAACTTGGTGGAATTGGGTGTCTACGTCTCTGATATCAGAGCTCACTT | ||
AGCCCAATACTACATGTTCCAAGCCGCCCACCCAACTGAAACCTACCCAGTCGAAGTTGCTGAAGCCGTT | ||
TTCAACTACGGTGACTTCACCACCATGTTGACCGGTATTGCTCCAGACCAAGTGACCAGAATGATCACCG | ||
GTGTTCCATGGTACTCCAGCAGATTAAAGCCAGCCATCTCCAGTGCTCTATCCAAGGACGGTATCTACAC | ||
TATCGCAAACTAG | ||
>YAL067W-A CDS=1-228 | ||
ATGCCAATTATAGGGGTGCCGAGGTGCCTTATAAAACCCTTTTCTGTGCCTGTGACATTTCCTTTTTCGG | ||
TCAAAAAGAATATCCGAATTTTAGATTTGGACCCTCGTACAGAAGCTTATTGTCTAAGCCTGAATTCAGT | ||
CTGCTTTAAACGGCTTCCGCGGAGGAAATATTTCCATCTCTTGAATTCGTACAACATTAAACGTGTGTTG | ||
GGAGTCGTATACTGTTAG |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
@fastq1 | ||
ATGCATGCATGCATGCATGCATGCATGCAT | ||
GCATGCATGCATGCATGCATGCATGCATGC | ||
ATGCAT | ||
+ | ||
FFFA@@FFFFFFFFFFHHB:::@BFFFFGG | ||
HIHIIIIIIIIIIIIIIIIIIIIIIIFFFF | ||
8011<< | ||
@fastq2 | ||
ATGCATGCATGCAT | ||
GCATGCATGCATGC | ||
+ | ||
IIA94445EEII== | ||
=>IIIIIIIIICCC |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
YAL069W:300-315 | ||
YAL068C:200-230 | ||
YAL067W-A:115-145 | ||
chr1:1-100 |