diff --git a/src/seqtk/seqtk_sample/config.vsh.yaml b/src/seqtk/seqtk_sample/config.vsh.yaml new file mode 100644 index 00000000..9f82c03a --- /dev/null +++ b/src/seqtk/seqtk_sample/config.vsh.yaml @@ -0,0 +1,54 @@ +name: seqtk_sample +namespace: seqtk +description: Subsamples sequences from FASTA/Q files. +keywords: [tag1, tag2] +links: + repository: https://github.com/lh3/seqtk/tree/v1.4 +license: MIT + +argument_groups: + - name: Inputs + arguments: + - name: --input + type: file + description: The input FASTA/Q file. + required: true + + - name: Outputs + arguments: + - name: --output + type: file + description: The output FASTA/Q file. + required: true + direction: output + + - name: Options + arguments: + - name: --seed + type: integer + description: Seed for random generator. + default: 42 + - name: --fraction_number + type: double + description: Fraction or number of sequences to sample. + default: 0.1 + - name: --two_pass_mode + type: boolean + description: twice as slow but with much reduced memory + default: false + +resources: + - type: bash_script + path: script.sh +test_resources: + - type: bash_script + path: test.sh + - type: file + path: test_data + +engines: + - type: docker + image: quay.io/biocontainers/seqtk:1.4--he4a0461_2 +runners: + - type: executable + - type: nextflow \ No newline at end of file diff --git a/src/seqtk/seqtk_sample/help.txt b/src/seqtk/seqtk_sample/help.txt new file mode 100644 index 00000000..1ca78811 --- /dev/null +++ b/src/seqtk/seqtk_sample/help.txt @@ -0,0 +1,7 @@ +``` +seqtk_sample +``` +Usage: seqtk sample [-2] [-s seed=11] | > + +Options: -s INT RNG seed [11] + -2 2-pass mode: twice as slow but with much reduced memory diff --git a/src/seqtk/seqtk_sample/script.sh b/src/seqtk/seqtk_sample/script.sh new file mode 100644 index 00000000..3ac9ca71 --- /dev/null +++ b/src/seqtk/seqtk_sample/script.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +## VIASH START +## VIASH END + +seqtk sample \ + ${par_2_pass_mode:+-2} \ + ${par_seed:+-s "$par_seed"} \ + "$par_input" \ + "$par_fraction_number" \ + > "$par_output" \ No newline at end of file diff --git a/src/seqtk/seqtk_sample/test.sh b/src/seqtk/seqtk_sample/test.sh new file mode 100644 index 00000000..d8594724 --- /dev/null +++ b/src/seqtk/seqtk_sample/test.sh @@ -0,0 +1,59 @@ +#!/bin/bash + +set -e + +## VIASH START +meta_executable="target/executable/seqtk/seqtk_sample" +meta_resources_dir="src/seqtk/seqtk_sample" +## VIASH END + +######################################################################################### +mkdir seqtk_sample_se +cd seqtk_sample_se + +echo "> Run seqtk_sample on SE with fastq" +"$meta_executable" \ + --input "$meta_resources_dir/test_data/reads/a.fastq" \ + --seed 42 \ + --fraction_number 3 \ + --output "sampled.fastq" + +echo ">> Check if output exists" +if [ ! -f "sampled.fastq" ]; then + echo ">> sampled.fastq.gz does not exist" + exit 1 +fi + +cat sampled.fastq + +######################################################################################### +cd .. +mkdir seqtk_sample_pe +cd seqtk_sample_pe + +echo ">> Run seqtk_sample on PE with fastq.gz" +"$meta_executable" \ + --input "$meta_resources_dir/test_data/reads/a.1.fastq.gz" \ + --seed 42 \ + --fraction_number 3 \ + --output "sampled_1.fastq" + +"$meta_executable" \ + --input "$meta_resources_dir/test_data/reads/a.2.fastq.gz" \ + --seed 42 \ + --fraction_number 3 \ + --output "sampled_2.fastq" + +echo ">> Check if output exists" +if [ ! -f "sampled_1.fastq" ] || [ ! -f "sampled_2.fastq" ]; then + echo ">> One or both output files do not exist" + exit 1 +fi + +echo ">> Compare reads" +# Extract headers +headers1=$(grep '^@' sampled_1.fastq | sed -e's/ 1$//' | sort) +headers2=$(grep '^@' sampled_2.fastq | sed -e 's/ 2$//' | sort) + +# Compare headers +diff <(echo "$headers1") <(echo "$headers2") || echo "Mismatch detected" && exit 1 diff --git a/src/seqtk/seqtk_sample/test_data/reads/a.1.fastq.gz b/src/seqtk/seqtk_sample/test_data/reads/a.1.fastq.gz new file mode 100644 index 00000000..97a72ce5 Binary files /dev/null and b/src/seqtk/seqtk_sample/test_data/reads/a.1.fastq.gz differ diff --git a/src/seqtk/seqtk_sample/test_data/reads/a.2.fastq.gz b/src/seqtk/seqtk_sample/test_data/reads/a.2.fastq.gz new file mode 100644 index 00000000..038bc976 Binary files /dev/null and b/src/seqtk/seqtk_sample/test_data/reads/a.2.fastq.gz differ diff --git a/src/seqtk/seqtk_sample/test_data/reads/a.fastq b/src/seqtk/seqtk_sample/test_data/reads/a.fastq new file mode 100644 index 00000000..42735560 --- /dev/null +++ b/src/seqtk/seqtk_sample/test_data/reads/a.fastq @@ -0,0 +1,4 @@ +@1 +ACGGCAT ++ +!!!!!!! diff --git a/src/seqtk/seqtk_sample/test_data/reads/a.fastq.gz b/src/seqtk/seqtk_sample/test_data/reads/a.fastq.gz new file mode 100644 index 00000000..0ae3f084 Binary files /dev/null and b/src/seqtk/seqtk_sample/test_data/reads/a.fastq.gz differ diff --git a/src/seqtk/seqtk_sample/test_data/reads/id.list b/src/seqtk/seqtk_sample/test_data/reads/id.list new file mode 100644 index 00000000..d00491fd --- /dev/null +++ b/src/seqtk/seqtk_sample/test_data/reads/id.list @@ -0,0 +1 @@ +1 diff --git a/src/seqtk/seqtk_sample/test_data/script.sh b/src/seqtk/seqtk_sample/test_data/script.sh new file mode 100755 index 00000000..cd3f2360 --- /dev/null +++ b/src/seqtk/seqtk_sample/test_data/script.sh @@ -0,0 +1,9 @@ +# clone repo +if [ ! -d /tmp/snakemake-wrappers ]; then + git clone --depth 1 --single-branch --branch master https://github.com/snakemake/snakemake-wrappers /tmp/snakemake-wrappers +fi + +# copy test data +cp -r /tmp/snakemake-wrappers/bio/seqtk/test/* src/seqtk/seqtk_sample/test_data + +rm src/seqtk/seqtk_sample/test_data/Snakefile