forked from viash-hub/biobox
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
three rsem components initial commit
- Loading branch information
1 parent
1679c59
commit dc275da
Showing
21 changed files
with
4,634 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,109 @@ | ||
name: "rsem_calculate_expression" | ||
namespace: "rsem" | ||
description: | | ||
Calculate expression with RSEM. | ||
keywords: ["Transcriptome", "Index"] | ||
links: | ||
homepage: https://deweylab.github.io/RSEM/ | ||
documentation: https://deweylab.github.io/RSEM/rsem-calculate-expression.html | ||
repository: https://github.com/deweylab/RSEM | ||
references: | ||
doi: https://doi.org/10.1186/1471-2105-12-323 | ||
license: GPL-3.0 | ||
|
||
|
||
argument_groups: | ||
- name: "Input" | ||
arguments: | ||
- name: "--id" | ||
type: string | ||
description: Sample ID. | ||
- name: "--strandedness" | ||
type: string | ||
description: Sample strand-specificity. Must be one of unstranded, forward, reverse | ||
choices: [forward, reverse, unstranded] | ||
- name: "--paired" | ||
type: boolean | ||
description: Paired-end reads or not? | ||
- name: "--input" | ||
type: file | ||
description: Input reads for quantification. | ||
multiple: true | ||
- name: "--index" | ||
type: file | ||
description: RSEM index. | ||
- name: "--extra_args" | ||
type: string | ||
description: Extra rsem-calculate-expression arguments in addition to the examples. | ||
|
||
- name: "Output" | ||
arguments: | ||
- name: "--counts_gene" | ||
type: file | ||
description: Expression counts on gene level | ||
example: $id.genes.results | ||
direction: output | ||
- name: "--counts_transcripts" | ||
type: file | ||
description: Expression counts on transcript level | ||
example: $id.isoforms.results | ||
direction: output | ||
- name: "--stat" | ||
type: file | ||
description: RSEM statistics | ||
example: $id.stat | ||
direction: output | ||
- name: "--logs" | ||
type: file | ||
description: RSEM logs | ||
example: $id.log | ||
direction: output | ||
- name: "--bam_star" | ||
type: file | ||
description: BAM file generated by STAR (optional) | ||
example: $id.STAR.genome.bam | ||
direction: output | ||
- name: "--bam_genome" | ||
type: file | ||
description: Genome BAM file (optional) | ||
example: $id.genome.bam | ||
direction: output | ||
- name: "--bam_transcript" | ||
type: file | ||
description: Transcript BAM file (optional) | ||
example: $id.transcript.bam | ||
direction: output | ||
|
||
resources: | ||
- type: bash_script | ||
path: script.sh | ||
|
||
test_resources: | ||
- type: bash_script | ||
path: test.sh | ||
- path: /testData/minimal_test/input_fastq/SRR6357070_1.fastq.gz | ||
- path: /testData/minimal_test/input_fastq/SRR6357070_2.fastq.gz | ||
- path: /testData/minimal_test/reference/rsem.tar.gz | ||
|
||
# TODO: Install bowtie/bowtie2 | ||
engines: | ||
- type: docker | ||
image: ubuntu:22.04 | ||
setup: | ||
- type: docker | ||
run: | | ||
apt-get update && \ | ||
apt-get install -y --no-install-recommends build-essential gcc g++ make wget zlib1g-dev unzip && \ | ||
apt-get clean && \ | ||
wget --no-check-certificate https://github.com/alexdobin/STAR/archive/refs/tags/2.7.11a.zip && \ | ||
unzip 2.7.11a.zip && \ | ||
cp STAR-2.7.11a/bin/Linux_x86_64_static/STAR /usr/local/bin && \ | ||
cd && \ | ||
wget --no-check-certificate https://github.com/deweylab/RSEM/archive/refs/tags/v1.3.3.zip && \ | ||
unzip v1.3.3.zip && \ | ||
cd RSEM-1.3.3 && \ | ||
make && \ | ||
make install | ||
runners: | ||
- type: executable | ||
- type: nextflow |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
#!/bin/bash | ||
|
||
set -eo pipefail | ||
|
||
function clean_up { | ||
rm -rf "$tmpdir" | ||
} | ||
trap clean_up EXIT | ||
|
||
tmpdir=$(mktemp -d "$meta_temp_dir/$meta_functionality_name-XXXXXXXX") | ||
|
||
if [ $par_strandedness == 'forward' ]; then | ||
strandedness='--strandedness forward' | ||
elif [ $par_strandedness == 'reverse' ]; then | ||
strandedness='--strandedness reverse' | ||
else | ||
strandedness='' | ||
fi | ||
|
||
IFS="," read -ra input <<< $par_input | ||
|
||
INDEX=$(find -L $meta_resources_dir/ -name "*.grp" | sed 's/\.grp$//') | ||
|
||
rsem-calculate-expression \ | ||
${meta_cpus:+--num-theads $meta_cpus} \ | ||
$strandedness \ | ||
${par_paired:+--paired-end} \ | ||
$par_extra_args \ | ||
${input[*]} \ | ||
$INDEX \ | ||
$par_id | ||
|
||
# Version | ||
text="${meta_functionality_name}: | ||
rsem: $(rsem-calculate-expression --version | sed -e 's/Current version: RSEM v//g')" | ||
if [ -e "$par_versions" ]; then | ||
echo "$text" >> "$par_versions" | ||
mv "$par_versions" "$par_updated_versions" | ||
else | ||
echo "$text" > "$par_updated_versions" | ||
fi |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
#!/bin/bash | ||
|
||
echo ">>> Testing $meta_functionality_name" | ||
|
||
tar -xavf $meta_resources_dir/rsem.tar.gz | ||
|
||
echo ">>> Calculating expression" | ||
"$meta_executable" \ | ||
--id WT_REP1 \ | ||
--strandedness reverse \ | ||
--paired true \ | ||
--input "$meta_resources_dir/SRR6357070_1.fastq.gz,$meta_resources_dir/SRR6357070_2.fastq.gz" \ | ||
--index rsem \ | ||
--extra_args "--star --star-output-genome-bam --star-gzipped-read-file --estimate-rspd --seed 1" \ | ||
--counts_gene WT_REP1.genes.results \ | ||
--counts_transctips WT_REP1.isoforms.results \ | ||
--stat WT_REP1.stat \ | ||
--logs WT_REP1.log \ | ||
--bam_star WT_REP1.STAR.genome.bam \ | ||
--bam_genome WT_REP1.genome.bam \ | ||
--bam_transcript WT_REP1.transcript.bam | ||
|
||
echo ">>> Checking whether output exists" | ||
[ ! -f "WT_REP1.genes.results" ] && echo "Gene level expression counts file does not exist!" && exit 1 | ||
[ ! -s "WT_REP1.genes.results" ] && echo "Gene level expression counts file is empty!" && exit 1 | ||
[ ! -f "WT_REP1.isoforms.results" ] && echo "Transcript level expression counts file does not exist!" && exit 1 | ||
[ ! -s "WT_REP1.isoforms.results" ] && echo "Transcript level expression counts file is empty!" && exit 1 | ||
[ ! -f "WT_REP1.stat" ] && echo "Stats file does not exist!" && exit 1 | ||
[ ! -s "WT_REP1.stat" ] && echo "Stats file is empty!" && exit 1 | ||
[ ! -f "WT_REP1.log" ] && echo "Log file does not exist!" && exit 1 | ||
[ ! -s "WT_REP1.log" ] && echo "Log file is empty!" && exit 1 | ||
|
||
echo "All tests succeeded!" | ||
exit 0 |
Binary file not shown.
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
#!/bin/bash | ||
|
||
wget https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq3/testdata/GSE110004/SRR6357070_1.fastq.gz | ||
wget https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq3/testdata/GSE110004/SRR6357070_2.fastq.gz | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
name: "rsem_merge_counts" | ||
namespace: "rsem" | ||
info: | ||
migration_info: | ||
git_repo: https://github.com/nf-core/rnaseq.git | ||
paths: [modules/local/rsem_merge_counts/main.nf] | ||
last_sha: 311279532694ce7520164ce4d65a388c0cd11f60 | ||
|
||
description: | | ||
Merge the transcript quantification results obtained from rsem calculate-expression across all samples. | ||
argument_groups: | ||
- name: "Input" | ||
arguments: | ||
- name: "--counts_gene" | ||
type: file | ||
description: Expression counts on gene level (genes) | ||
- name: "--counts_transcripts" | ||
type: file | ||
description: Expression counts on transcript level (isoforms) | ||
- name: "--versions" | ||
type: file | ||
must_exist: false | ||
|
||
- name: "Output" | ||
arguments: | ||
- name: "--merged_gene_counts" | ||
type: file | ||
description: File containing gene counts across all samples. | ||
default: rsem.merged.gene_counts.tsv | ||
direction: output | ||
- name: "--merged_gene_tpm" | ||
type: file | ||
description: File containing gene TPM across all samples. | ||
default: rsem.merged.gene_tpm.tsv | ||
direction: output | ||
- name: "--merged_transcript_counts" | ||
type: file | ||
description: File containing transcript counts across all samples. | ||
default: rsem.merged.transcript_counts.tsv | ||
direction: output | ||
- name: "--merged_transcript_tpm" | ||
type: file | ||
description: File containing transcript TPM across all samples. | ||
default: rsem.merged.transcript_tpm.tsv | ||
direction: output | ||
- name: "--updated_versions" | ||
type: file | ||
default: versions.yml | ||
direction: output | ||
|
||
resources: | ||
- type: bash_script | ||
path: script.sh | ||
|
||
test_resources: | ||
- type: bash_script | ||
path: test.sh | ||
# - path: /testData/minimal_test/input_fastq/SRR6357070_1.fastq.gz | ||
# - path: /testData/minimal_test/input_fastq/SRR6357070_2.fastq.gz | ||
|
||
engines: | ||
- type: docker | ||
image: ubuntu:22.04 | ||
|
||
runners: | ||
- type: executable | ||
- type: nextflow |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
```bash | ||
rsem-merge-counts --help | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
#!/bin/bash | ||
|
||
set -ep pipefail | ||
|
||
mkdir -p tmp/genes | ||
# cut -f 1,2 `ls $par_count_genes/*` | head -n 1` > gene_ids.txt | ||
for file_id in ${par_count_genes[*]}; do | ||
samplename=$(basename $file_id | sed s/\\.genes.results\$//g) | ||
echo $samplename > tmp/genes/${samplename}.counts.txt | ||
cut -f 5 ${file_id} | tail -n+2 >> tmp/genes/${samplename}.counts.txt | ||
echo $samplename > tmp/genes/${samplename}.tpm.txt | ||
cut -f 6 ${file_id} | tail -n+2 >> tmp/genes/${samplename}.tpm.txt | ||
done | ||
|
||
mkdir -p tmp/isoforms | ||
# cut -f 1,2 `ls $par_counts_transcripts/*` | head -n 1` > transcript_ids.txt | ||
for file_id in ${par_counts_transcripts[*]}; do | ||
samplename=$(basename $file_id | sed s/\\.isoforms.results\$//g) | ||
echo $samplename > tmp/isoforms/${samplename}.counts.txt | ||
cut -f 5 ${file_id} | tail -n+2 >> tmp/isoforms/${samplename}.counts.txt | ||
echo $samplename > tmp/isoforms/${samplename}.tpm.txt | ||
cut -f 6 ${file_id} | tail -n+2 >> tmp/isoforms/${samplename}.tpm.txt | ||
done | ||
|
||
paste gene_ids.txt tmp/genes/*.counts.txt > $par_merged_gene_counts | ||
paste gene_ids.txt tmp/genes/*.tpm.txt > $par_merged_gene_tpm | ||
paste transcript_ids.txt tmp/isoforms/*.counts.txt > $par_merged_transcript_counts | ||
paste transcript_ids.txt tmp/isoforms/*.tpm.txt > $par_merged_transcript_tpm | ||
|
||
# Version | ||
text="${meta_functionality_name}: | ||
sed: $(echo $(sed --version 2>&1) | grep -oP 'sed \(GNU sed\) \K\d+\.\d+')" | ||
if [ -e "$par_versions" ]; then | ||
echo "$text" >> "$par_versions" | ||
mv "$par_versions" "$par_updated_versions" | ||
else | ||
echo "$text" > "$par_updated_versions" | ||
fi |
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
#!/bin/bash | ||
|
||
wget https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq3/testdata/GSE110004/SRR6357070_1.fastq.gz | ||
wget https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq3/testdata/GSE110004/SRR6357070_2.fastq.gz | ||
wget https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/reference/rsem.tar.gz | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
name: "rsem_prepare_reference" | ||
namespace: "rsem" | ||
info: | ||
migration_info: | ||
git_repo: https://github.com/nf-core/rnaseq.git | ||
paths: [modules/nf-core/rsem/preparereference/main.nf, modules/nf-core/rsem/preparereference/meta.yml] | ||
last_sha: 92b2a7857de1dda9d1c19a088941fc81e2976ff7 | ||
|
||
description: | | ||
Prepare a reference genome for RSEM. | ||
argument_groups: | ||
- name: "Input" | ||
arguments: | ||
- name: "--fasta" | ||
type: file | ||
description: Genome fasta file | ||
- name: "--gtf" | ||
type: file | ||
description: GTF file | ||
- name: "--star" | ||
type: boolean | ||
- name: "--versions" | ||
type: file | ||
must_exist: false | ||
|
||
- name: "Output" | ||
arguments: | ||
- name: "--rsem" | ||
type: file | ||
direction: output | ||
description: RSEM index directory. | ||
- name: "--transcript_fasta" | ||
type: file | ||
direction: output | ||
description: Fasta file of transcripts | ||
- name: "--updated_versions" | ||
type: file | ||
default: versions.yml | ||
direction: output | ||
|
||
resources: | ||
- type: bash_script | ||
path: script.sh | ||
|
||
test_resources: | ||
- type: bash_script | ||
path: test.sh | ||
- path: /testData/minimal_test/reference/genome.fasta | ||
- path: /testData/minimal_test/reference/genes.gtf.gz | ||
|
||
engines: | ||
- type: docker | ||
image: ubuntu:22.04 | ||
setup: | ||
- type: docker | ||
run: | | ||
apt-get update && \ | ||
apt-get install -y --no-install-recommends build-essential gcc g++ make wget zlib1g-dev unzip && \ | ||
apt-get clean && \ | ||
wget --no-check-certificate https://github.com/alexdobin/STAR/archive/refs/tags/2.7.11a.zip && \ | ||
unzip 2.7.11a.zip && \ | ||
cp STAR-2.7.11a/bin/Linux_x86_64_static/STAR /usr/local/bin && \ | ||
cd && \ | ||
wget --no-check-certificate https://github.com/deweylab/RSEM/archive/refs/tags/v1.3.3.zip && \ | ||
unzip v1.3.3.zip && \ | ||
cd RSEM-1.3.3 && \ | ||
make && \ | ||
make install | ||
runners: | ||
- type: executable | ||
- type: nextflow |
Oops, something went wrong.