-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* tests added * tests extended * changelog entry added * reorganized seqtk namespace + added seqtk subseq config and script * added subseq help.txt * revert to seqtk sample only * remove subseq * updated tests, added tags * Update two_pass_mode Co-authored-by: Robrecht Cannoodt <[email protected]> * author added to config --------- Co-authored-by: Robrecht Cannoodt <[email protected]>
- Loading branch information
1 parent
13c5439
commit e615d2a
Showing
12 changed files
with
207 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
name: Jakub Majercik | ||
info: | ||
links: | ||
email: [email protected] | ||
github: jakubmajercik | ||
linkedin: jakubmajercik | ||
organizations: | ||
- name: Data Intuitive | ||
href: https://www.data-intuitive.com | ||
role: Bioinformatics Engineer |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
name: seqtk_sample | ||
namespace: seqtk | ||
description: Subsamples sequences from FASTA/Q files. | ||
keywords: [sample, FASTA, FASTQ] | ||
links: | ||
repository: https://github.com/lh3/seqtk/tree/v1.4 | ||
license: MIT | ||
authors: | ||
- __merge__: /src/_authors/jakub_majercik.yaml | ||
roles: [ author, maintainer ] | ||
|
||
argument_groups: | ||
- name: Inputs | ||
arguments: | ||
- name: --input | ||
type: file | ||
description: The input FASTA/Q file. | ||
required: true | ||
|
||
- name: Outputs | ||
arguments: | ||
- name: --output | ||
type: file | ||
description: The output FASTA/Q file. | ||
required: true | ||
direction: output | ||
|
||
- name: Options | ||
arguments: | ||
- name: --seed | ||
type: integer | ||
description: Seed for random generator. | ||
example: 42 | ||
- name: --fraction_number | ||
type: double | ||
description: Fraction or number of sequences to sample. | ||
required: true | ||
example: 0.1 | ||
- name: --two_pass_mode | ||
type: boolean_true | ||
description: Twice as slow but with much reduced memory | ||
|
||
resources: | ||
- type: bash_script | ||
path: script.sh | ||
test_resources: | ||
- type: bash_script | ||
path: test.sh | ||
- type: file | ||
path: ../test_data | ||
|
||
engines: | ||
- type: docker | ||
image: quay.io/biocontainers/seqtk:1.4--he4a0461_2 | ||
runners: | ||
- type: executable | ||
- type: nextflow |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
``` | ||
seqtk_subseq | ||
``` | ||
Usage: seqtk subseq [options] <in.fa> <in.bed>|<name.list> | ||
Options: | ||
-t TAB delimited output | ||
-s strand aware | ||
-l INT sequence line length [0] | ||
Note: Use 'samtools faidx' if only a few regions are intended. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
#!/bin/bash | ||
|
||
## VIASH START | ||
## VIASH END | ||
|
||
seqtk sample \ | ||
${par_two_pass_mode:+-2} \ | ||
${par_seed:+-s "$par_seed"} \ | ||
"$par_input" \ | ||
"$par_fraction_number" \ | ||
> "$par_output" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
#!/bin/bash | ||
|
||
set -e | ||
|
||
## VIASH START | ||
meta_executable="target/executable/seqtk/seqtk_sample" | ||
meta_resources_dir="src/seqtk" | ||
## VIASH END | ||
|
||
######################################################################################### | ||
mkdir seqtk_sample_se | ||
cd seqtk_sample_se | ||
|
||
echo "> Run seqtk_sample on fastq SE" | ||
"$meta_executable" \ | ||
--input "$meta_resources_dir/test_data/reads/a.1.fastq.gz" \ | ||
--seed 42 \ | ||
--fraction_number 3 \ | ||
--output "sampled.fastq" | ||
|
||
echo ">> Check if output exists" | ||
if [ ! -f "sampled.fastq" ]; then | ||
echo ">> sampled.fastq does not exist" | ||
exit 1 | ||
fi | ||
|
||
echo ">> Count number of samples" | ||
num_samples=$(grep -c '^@' sampled.fastq) | ||
if [ "$num_samples" -ne 3 ]; then | ||
echo ">> sampled.fastq does not contain 3 samples" | ||
exit 1 | ||
fi | ||
|
||
######################################################################################### | ||
cd .. | ||
mkdir seqtk_sample_pe_number | ||
cd seqtk_sample_pe_number | ||
|
||
echo ">> Run seqtk_sample on fastq.gz PE with number of reads" | ||
"$meta_executable" \ | ||
--input "$meta_resources_dir/test_data/reads/a.1.fastq.gz" \ | ||
--seed 42 \ | ||
--fraction_number 3 \ | ||
--output "sampled_1.fastq" | ||
|
||
"$meta_executable" \ | ||
--input "$meta_resources_dir/test_data/reads/a.2.fastq.gz" \ | ||
--seed 42 \ | ||
--fraction_number 3 \ | ||
--output "sampled_2.fastq" | ||
|
||
echo ">> Check if output exists" | ||
if [ ! -f "sampled_1.fastq" ] || [ ! -f "sampled_2.fastq" ]; then | ||
echo ">> One or both output files do not exist" | ||
exit 1 | ||
fi | ||
|
||
echo ">> Compare reads" | ||
# Extract headers | ||
headers1=$(grep '^@' sampled_1.fastq | sed -e's/ 1$//' | sort) | ||
headers2=$(grep '^@' sampled_2.fastq | sed -e 's/ 2$//' | sort) | ||
|
||
# Compare headers | ||
diff <(echo "$headers1") <(echo "$headers2") || { echo "Mismatch detected"; exit 1; } | ||
|
||
echo ">> Count number of samples" | ||
num_headers=$(echo "$headers1" | wc -l) | ||
if [ "$num_headers" -ne 3 ]; then | ||
echo ">> sampled_1.fastq does not contain 3 headers" | ||
exit 1 | ||
fi | ||
|
||
######################################################################################### | ||
cd .. | ||
mkdir seqtk_sample_pe_fraction | ||
cd seqtk_sample_pe_fraction | ||
|
||
echo ">> Run seqtk_sample on fastq.gz PE with fraction of reads" | ||
"$meta_executable" \ | ||
--input "$meta_resources_dir/test_data/reads/a.1.fastq.gz" \ | ||
--seed 42 \ | ||
--fraction_number 0.5 \ | ||
--output "sampled_1.fastq" | ||
|
||
"$meta_executable" \ | ||
--input "$meta_resources_dir/test_data/reads/a.2.fastq.gz" \ | ||
--seed 42 \ | ||
--fraction_number 0.5 \ | ||
--output "sampled_2.fastq" | ||
|
||
echo ">> Check if output exists" | ||
if [ ! -f "sampled_1.fastq" ] || [ ! -f "sampled_2.fastq" ]; then | ||
echo ">> One or both output files do not exist" | ||
exit 1 | ||
fi | ||
|
||
echo ">> Compare reads" | ||
# Extract headers | ||
headers1=$(grep '^@' sampled_1.fastq | sed -e's/ 1$//' | sort) | ||
headers2=$(grep '^@' sampled_2.fastq | sed -e 's/ 2$//' | sort) | ||
|
||
# Compare headers | ||
diff <(echo "$headers1") <(echo "$headers2") || { echo "Mismatch detected"; exit 1; } | ||
|
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
@1 | ||
ACGGCAT | ||
+ | ||
!!!!!!! |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
# clone repo | ||
if [ ! -d /tmp/snakemake-wrappers ]; then | ||
git clone --depth 1 --single-branch --branch master https://github.com/snakemake/snakemake-wrappers /tmp/snakemake-wrappers | ||
fi | ||
|
||
# copy test data | ||
cp -r /tmp/snakemake-wrappers/bio/seqtk/test/* src/seqtk/test_data | ||
|
||
rm src/seqtk/test_data/Snakefile |