Skip to content

Commit

Permalink
complete component
Browse files Browse the repository at this point in the history
  • Loading branch information
emmarousseau committed Sep 15, 2024
1 parent 3af66f8 commit d07a42a
Show file tree
Hide file tree
Showing 9 changed files with 225 additions and 0 deletions.
103 changes: 103 additions & 0 deletions src/kallisto/kallisto_quant/config.vsh.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
name: kallisto_quant
namespace: kallisto
description: |
Quantifying abundances of transcripts from RNA-Seq data, or more generally of target sequences using high-throughput sequencing reads.
keywords: [kallisto, quant, pseudoalignment]
links:
homepage: https://pachterlab.github.io/kallisto/about
documentation: https://pachterlab.github.io/kallisto/manual
repository: https://github.com/pachterlab/kallisto
issue_tracker: https://github.com/pachterlab/kallisto/issues
references:
doi: https://doi.org/10.1038/nbt.3519
license: BSD 2-Clause License

argument_groups:
- name: "Input"
arguments:
- name: "--input"
type: file
description: List of input FastQ files of size 1 and 2 for single-end and paired-end data, respectively.
direction: "input"
multiple: true
required: true
- name: "--index"
alternatives: ["-i"]
type: file
description: Kallisto genome index.
must_exist: true
required: true

- name: "Output"
arguments:
- name: "--output_dir"
alternatives: ["-o"]
type: string
description: Directory to write output to.
required: true

- name: "Options"
arguments:
- name: "--single"
type: boolean_true
description: Single end mode.
- name: "--single_overhang"
type: boolean_true
description: Include reads where unobserved rest of fragment is predicted to lie outside a transcript.
- name: "--fr_stranded"
type: boolean_true
description: Strand specific reads, first read forward.
- name: "--rf_stranded"
type: boolean_true
description: Strand specific reads, first read reverse.
- name: "--fragment_length"
alternatives: ["-l"]
type: double
description: The estimated average fragment length.
- name: "--sd"
alternatives: ["-s"]
type: double
description: |
The estimated standard deviation of the fragment length (default: -l, -s values are estimated
from paired end data, but are required when using --single).
- name: "--plaintext"
type: boolean_true
description: Output plaintext instead of HDF5.
- name: "--bootstrap_samples"
alternatives: ["-b"]
type: integer
description: |
Number of bootstrap samples to draw. Default: '0'
example: 0
- name: "--seed"
type: integer
description: |
Random seed for bootstrap. Default: '42'
example: 42


resources:
- type: bash_script
path: script.sh

test_resources:
- type: bash_script
path: test.sh
- type: file
path: test_data

engines:
- type: docker
image: ubuntu:22.04
setup:
- type: docker
run: |
apt-get update && \
apt-get install -y --no-install-recommends wget && \
wget --no-check-certificate https://github.com/pachterlab/kallisto/releases/download/v0.50.1/kallisto_linux-v0.50.1.tar.gz && \
tar -xzf kallisto_linux-v0.50.1.tar.gz && \
mv kallisto/kallisto /usr/local/bin/ && \
echo "kallisto: $(kallisto version | sed 's/kallisto, version //')" > /var/software_versions.txt
runners:
- type: executable
- type: nextflow
46 changes: 46 additions & 0 deletions src/kallisto/kallisto_quant/script.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
#!/bin/bash

## VIASH START
## VIASH END

set -eo pipefail

unset_if_false=( par_single par_single_overhang par_rf_stranded par_fr_stranded par_plaintext )

for var in "${unset_if_false[@]}"; do
temp_var="${!var}"
[[ "$temp_var" == "false" ]] && unset $var
done

IFS=";" read -ra input <<< $par_input

# Check if par_single is not set and ensure even number of input files
if [ -z "$par_single" ]; then
if [ $((${#input[@]} % 2)) -ne 0 ]; then
echo "Error: When running in paired-end mode, the number of input files must be even."
echo "Number of input files provided: ${#input[@]}"
exit 1
fi
fi


mkdir -p $par_output_dir


kallisto quant \
${meta_cpus:+--threads $meta_cpus} \
-i $par_index \
${par_gtf:+--gtf "${par_gtf}"} \
${par_single:+--single} \
${par_single_overhang:+--single-overhang} \
${par_fr_stranded:+--fr-stranded} \
${par_rf_stranded:+--rf-stranded} \
${par_plaintext:+--plaintext} \
${par_bootstrap_samples:+--bootstrap-samples "${par_bootstrap_samples}"} \
${par_fragment_length:+--fragment-length "${par_fragment_length}"} \
${par_sd:+--sd "${par_sd}"} \
${par_seed:+--seed "${par_seed}"} \
-o $par_output_dir \
${input[*]}


53 changes: 53 additions & 0 deletions src/kallisto/kallisto_quant/test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
#!/bin/bash

echo ">>> Testing $meta_functionality_name"

echo ">>> Test 1: Testing for paired-end reads"
"$meta_executable" \
--index "$meta_resources_dir/test_data/index/transcriptome.idx" \
--rf_stranded \
--output_dir . \
--input "$meta_resources_dir/test_data/reads/A_R1.fastq;$meta_resources_dir/test_data/reads/A_R2.fastq"

echo ">>> Checking whether output exists"
[ ! -f "run_info.json" ] && echo "run_info.json does not exist!" && exit 1
[ ! -s "run_info.json" ] && echo "run_info.json is empty!" && exit 1
[ ! -f "abundance.tsv" ] && echo "abundance.tsv does not exist!" && exit 1
[ ! -s "abundance.tsv" ] && echo "abundance.tsv is empty!" && exit 1
[ ! -f "abundance.h5" ] && echo "abundance.h5 does not exist!" && exit 1
[ ! -s "abundance.h5" ] && echo "abundance.h5 is empty!" && exit 1

echo ">>> Checking if output is correct"
diff "abundance.tsv" "$meta_resources_dir/test_data/abundance_1.tsv" || { echo "abundance.tsv is not correct"; exit 1; }

rm -rf abundance.tsv abundance.h5 run_info.json

################################################################################

echo ">>> Test 2: Testing for single-end reads"
"$meta_executable" \
--index "$meta_resources_dir/test_data/index/transcriptome.idx" \
--rf_stranded \
--output_dir . \
--single \
--input "$meta_resources_dir/test_data/reads/A_R1.fastq" \
--fragment_length 101 \
--sd 50

echo ">>> Checking whether output exists"
[ ! -f "run_info.json" ] && echo "run_info.json does not exist!" && exit 1
[ ! -s "run_info.json" ] && echo "run_info.json is empty!" && exit 1
[ ! -f "abundance.tsv" ] && echo "abundance.tsv does not exist!" && exit 1
[ ! -s "abundance.tsv" ] && echo "abundance.tsv is empty!" && exit 1
[ ! -f "abundance.h5" ] && echo "abundance.h5 does not exist!" && exit 1
[ ! -s "abundance.h5" ] && echo "abundance.h5 is empty!" && exit 1

echo ">>> Checking if output is correct"
diff "abundance.tsv" "$meta_resources_dir/test_data/abundance_2.tsv" || { echo "abundance.tsv is not correct"; exit 1; }

rm -rf abundance.tsv abundance.h5 run_info.json

################################################################################

echo "All tests succeeded!"
exit 0
2 changes: 2 additions & 0 deletions src/kallisto/kallisto_quant/test_data/abundance_1.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
target_id length eff_length est_counts tpm
Sheila 35 36 0 -nan
2 changes: 2 additions & 0 deletions src/kallisto/kallisto_quant/test_data/abundance_2.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
target_id length eff_length est_counts tpm
Sheila 35 15.0373 0 -nan
Binary file not shown.
4 changes: 4 additions & 0 deletions src/kallisto/kallisto_quant/test_data/reads/A_R1.fastq
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
@1
GCTAGCTCAGAAAAAAAAAATCGTCGCGTGCGCGT
+
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
4 changes: 4 additions & 0 deletions src/kallisto/kallisto_quant/test_data/reads/A_R2.fastq
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
@1
GCTAGCTCAGAAAAAAAAAATCGTCGCGTGCGCGT
+
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
11 changes: 11 additions & 0 deletions src/kallisto/kallisto_quant/test_data/script.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#!/bin/bash

# clone repo
if [ ! -d /tmp/snakemake-wrappers ]; then
git clone --depth 1 --single-branch --branch master https://github.com/snakemake/snakemake-wrappers /tmp/snakemake-wrappers
fi

# copy test data
cp -r /tmp/snakemake-wrappers/bio/kallisto/quant/test/* src/kallisto/kallisto_quant/test_data

rm src/kallisto/kallisto_quant/test_data/Snakefile

0 comments on commit d07a42a

Please sign in to comment.