-
Notifications
You must be signed in to change notification settings - Fork 6
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Rsem-calculate-expression #93
Changes from 20 commits
38f586b
271108c
2c26968
5ea8c78
897cd89
ea0383c
44b3fcc
6cc4f94
c9613d1
1679c59
dc275da
9745e4a
9f9602a
84fe94d
7a8772f
aef65a1
2918fb4
c584b93
e055c97
d23ee7b
bd1075e
de23085
601518c
cf46beb
41598f0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,103 @@ | ||
#!/bin/bash | ||
|
||
## VIASH START | ||
## VIASH END | ||
|
||
set -eo pipefail | ||
|
||
function clean_up { | ||
rm -rf "$tmpdir" | ||
} | ||
trap clean_up EXIT | ||
|
||
tmpdir=$(mktemp -d "$meta_temp_dir/$meta_functionality_name-XXXXXXXX") | ||
|
||
if [ "$par_strandedness" == 'forward' ]; then | ||
strandedness='--strandedness forward' | ||
elif [ "$par_strandedness" == 'reverse' ]; then | ||
strandedness="--strandedness reverse" | ||
else | ||
strandedness='' | ||
fi | ||
|
||
IFS=";" read -ra input <<< $par_input | ||
|
||
INDEX=$(find -L $meta_resources_dir/$par_index -name "*.grp" | sed 's/\.grp$//') | ||
|
||
unset_if_false=( par_paired par_quiet par_no_bam_output par_sampling_for_bam par_no_qualities | ||
par_alignments par_bowtie2 par_star par_hisat2_hca par_append_names | ||
par_single_cell_prior par_calc_pme par_calc_ci par_phred64_quals | ||
par_solexa_quals par_star_gzipped_read_file par_star_bzipped_read_file | ||
par_star_output_genome_bam par_estimate_rspd par_keep_intermediate_files | ||
par_time par_run_pRSEM par_cap_stacked_chipseq_reads par_sort_bam_by_read_name ) | ||
|
||
for par in ${unset_if_false[@]}; do | ||
test_val="${!par}" | ||
[[ "$test_val" == "false" ]] && unset $par | ||
done | ||
|
||
rsem-calculate-expression \ | ||
${par_quiet:+-q} \ | ||
${par_no_bam_output:+--no-bam-output} \ | ||
${par_sampling_for_bam:+--sampling-for-bam} \ | ||
${par_no_qualities:+--no-qualities} \ | ||
${par_alignments:+--alignments} \ | ||
${par_bowtie2:+--bowtie2} \ | ||
${par_star:+--star} \ | ||
${par_hisat2_hca:+--hisat2-hca} \ | ||
${par_append_names:+--append-names} \ | ||
${par_single_cell_prior:+--single-cell-prior} \ | ||
${par_calc_pme:+--calc-pme} \ | ||
${par_calc_ci:+--calc-ci} \ | ||
${par_phred64_quals:+--phred64-quals} \ | ||
${par_solexa_quals:+--solexa-quals} \ | ||
${par_star_gzipped_read_file:+--star-gzipped-read-file} \ | ||
${par_star_bzipped_read_file:+--star-bzipped-read-file} \ | ||
${par_star_output_genome_bam:+--star-output-genome-bam} \ | ||
${par_estimate_rspd:+--estimate-rspd} \ | ||
${par_keep_intermediate_files:+--keep-intermediate-files} \ | ||
${par_time:+--time} \ | ||
${par_run_pRSEM:+--run-pRSEM} \ | ||
${par_cap_stacked_chipseq_reads:+--cap-stacked-chipseq-reads} \ | ||
${par_sort_bam_by_read_name:+--sort-bam-by-read-name} \ | ||
${par_counts_gene:+--counts-gene "$par_counts_gene"} \ | ||
${par_counts_transcripts:+--counts-transcripts "$par_counts_transcripts"} \ | ||
${par_stat:+--stat "$par_stat"} \ | ||
${par_bam_star:+--bam-star "$par_bam_star"} \ | ||
${par_bam_genome:+--bam-genome "$par_bam_genome"} \ | ||
${par_bam_transcript:+--bam-transcript "$par_bam_transcript"} \ | ||
${par_fai:+--fai "$par_fai"} \ | ||
${par_seed:+--seed "$par_seed"} \ | ||
${par_seed_length:+--seed-length "$par_seed_length"} \ | ||
${par_bowtie_n:+--bowtie-n "$par_bowtie_n"} \ | ||
${par_bowtie_e:+--bowtie-e "$par_bowtie_e"} \ | ||
${par_bowtie_m:+--bowtie-m "$par_bowtie_m"} \ | ||
${par_bowtie_chunkmbs:+--bowtie-chunkmbs "$par_bowtie_chunkmbs"} \ | ||
${par_bowtie2_mismatch_rate:+--bowtie2-mismatch-rate "$par_bowtie2_mismatch_rate"} \ | ||
${par_bowtie2_k:+--bowtie2-k "$par_bowtie2_k"} \ | ||
${par_bowtie2_sensitivity_level:+--bowtie2-sensitivity-level "$par_bowtie2_sensitivity_level"} \ | ||
${par_tag:+--tag "$par_tag"} \ | ||
${par_fragment_length_min:+--fragment-length-min "$par_fragment_length_min"} \ | ||
${par_fragment_length_max:+--fragment-length-max "$par_fragment_length_max"} \ | ||
${par_fragment_length_mean:+--fragment-length-mean "$par_fragment_length_mean"} \ | ||
${par_fragment_length_sd:+--fragment-length-sd "$par_fragment_length_sd"} \ | ||
${par_num_rspd_bins:+--num-rspd-bins "$par_num_rspd_bins"} \ | ||
${par_gibbs_burnin:+--gibbs-burnin "$par_gibbs_burnin"} \ | ||
${par_gibbs_number_of_samples:+--gibbs-number-of-samples "$par_gibbs_number_of_samples"} \ | ||
${par_gibbs_sampling_gap:+--gibbs-sampling-gap "$par_gibbs_sampling_gap"} \ | ||
${par_ci_credibility_level:+--ci-credibility-level "$par_ci_credibility_level"} \ | ||
${par_ci_number_of_samples_per_count_vector:+--ci-number-of-samples-per-count-vector "$par_ci_number_of_samples_per_count_vector"} \ | ||
${par_temporary_folder:+--temporary-folder "$par_temporary_folder"} \ | ||
${par_chipseq_peak_file:+--chipseq-peak-file "$par_chipseq_peak_file"} \ | ||
${par_chipseq_target_read_files:+--chipseq-target-read-files "$par_chipseq_target_read_files"} \ | ||
${par_chipseq_control_read_files:+--chipseq-control-read-files "$par_chipseq_control_read_files"} \ | ||
${par_chipseq_read_files_multi_targets:+--chipseq-read-files-multi-targets "$par_chipseq_read_files_multi_targets"} \ | ||
${par_chipseq_bed_files_multi_targets:+--chipseq-bed-files-multi-targets "$par_chipseq_bed_files_multi_targets"} \ | ||
${par_n_max_stacked_chipseq_reads:+--n-max-stacked-chipseq-reads "$par_n_max_stacked_chipseq_reads"} \ | ||
${par_partition_model:+--partition-model "$par_partition_model"} \ | ||
$strandedness \ | ||
${par_paired:+--paired-end} \ | ||
${input[*]} \ | ||
$INDEX \ | ||
$par_id | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,96 @@ | ||
#!/bin/bash | ||
|
||
echo ">>> Testing $meta_executable" | ||
|
||
test_dir="${meta_resources_dir}/test_data" | ||
|
||
# wget https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq3/reference/rsem.tar.gz | ||
# gunzip -k rsem.tar.gz | ||
# tar -xf rsem.tar | ||
# mv $test_dir/rsem $meta_resources_dir | ||
|
||
echo "> Prepare test data" | ||
|
||
cat > reads_R1.fastq <<'EOF' | ||
@SEQ_ID1 | ||
ACGCTGCCTCATAAGCCTCACACAT | ||
+ | ||
IIIIIIIIIIIIIIIIIIIIIIIII | ||
@SEQ_ID2 | ||
ACCCGCAAGATTAGGCTCCGTACAC | ||
+ | ||
!!!!!!!!!!!!!!!!!!!!!!!!! | ||
EOF | ||
|
||
cat > reads_R2.fastq <<'EOF' | ||
@SEQ_ID1 | ||
ATGTGTGAGGCTTATGAGGCAGCGT | ||
+ | ||
IIIIIIIIIIIIIIIIIIIIIIIII | ||
@SEQ_ID2 | ||
GTGTACGGAGCCTAATCTTGCAGGG | ||
+ | ||
!!!!!!!!!!!!!!!!!!!!!!!!! | ||
EOF | ||
|
||
cat > genome.fasta <<'EOF' | ||
>chr1 | ||
TGGCATGAGCCAACGAACGCTGCCTCATAAGCCTCACACATCCGCGCCTATGTTGTGACTCTCTGTGAGCGTTCGTGGG | ||
GCTCGTCACCACTATGGTTGGCCGGTTAGTAGTGTGACTCCTGGTTTTCTGGAGCTTCTTTAAACCGTAGTCCAGTCAA | ||
TGCGAATGGCACTTCACGACGGACTGTCCTTAGGTGTGAGGCTTATGAGGCACTCAGGGGA | ||
EOF | ||
|
||
cat > genes.gtf <<'EOF' | ||
chr1 example_source gene 0 50 . + . gene_id "gene1"; transcript_id "transcript1"; | ||
chr1 example_source exon 20 40 . + . gene_id "gene1"; transcript_id "transcript1"; | ||
chr1 example_source gene 100 219 . + . gene_id "gene2"; transcript_id "transcript2"; | ||
chr1 example_source exon 191 210 . + . gene_id "gene2"; transcript_id "transcript2"; | ||
EOF | ||
|
||
|
||
echo "> Generate index" | ||
|
||
rsem-prepare-reference \ | ||
--gtf "genes.gtf" \ | ||
"genome.fasta" \ | ||
"index"\ | ||
emmarousseau marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
mkdir index | ||
mv index.* index/ | ||
|
||
STAR \ | ||
${meta_cpus:+--runThreadN $meta_cpus} \ | ||
--runMode genomeGenerate \ | ||
--genomeDir "index/" \ | ||
--genomeFastaFiles "genome.fasta" \ | ||
--sjdbGTFfile "genes.gtf" \ | ||
--genomeSAindexNbases 2 | ||
|
||
######################################################################################### | ||
|
||
echo ">>> Test 1: Paired-end reads using STAR to align reads" | ||
"$meta_executable" \ | ||
--star \ | ||
--paired \ | ||
--input "reads_R1.fastq;reads_R2.fastq" \ | ||
--index index \ | ||
--id test \ | ||
--seed 1 \ | ||
--quiet | ||
|
||
echo ">>> Checking whether output exists" | ||
[ ! -f "test.genes.results" ] && echo "Gene level expression counts file does not exist!" && exit 1 | ||
[ ! -s "test.genes.results" ] && echo "Gene level expression counts file is empty!" && exit 1 | ||
[ ! -f "test.isoforms.results" ] && echo "Transcript level expression counts file does not exist!" && exit 1 | ||
[ ! -s "test.isoforms.results" ] && echo "Transcript level expression counts file is empty!" && exit 1 | ||
[ ! -d "test.stat" ] && echo "Stats file does not exist!" && exit 1 | ||
|
||
echo ">>> Check wheter output is correct" | ||
diff $test_dir/output/ref.genes.results test.genes.results || { echo "Gene level expression counts file is incorrect!"; exit 1; } | ||
diff $test_dir/output/ref.isoforms.results test.isoforms.results || { echo "Transcript level expression counts file is incorrect!"; exit 1; } | ||
diff $test_dir/output/ref.cnt test.stat/test.cnt || { echo "Stats file is incorrect!"; exit 1; } | ||
|
||
##################################################################################################### | ||
|
||
echo "All tests succeeded!" | ||
exit 0 | ||
emmarousseau marked this conversation as resolved.
Show resolved
Hide resolved
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If the fastq files are inside There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yep makes sense haha |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
1 0 0 1 | ||
0 0 0 | ||
0 3 | ||
0 1 | ||
Inf 0 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
gene_id transcript_id(s) length effective_length expected_count TPM FPKM | ||
gene1 transcript1 21.00 21.00 0.00 0.00 0.00 | ||
gene2 transcript2 20.00 20.00 0.00 0.00 0.00 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
transcript_id gene_id length effective_length expected_count TPM FPKM IsoPct | ||
transcript1 gene1 21 21.00 0.00 0.00 0.00 0.00 | ||
transcript2 gene2 20 20.00 0.00 0.00 0.00 0.00 |
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This script is no longer needed, right? |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
#!/bin/bash | ||
|
||
wget wget https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq3/reference/rsem.tar.gz | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yo dawg I heard you like wget so I put some wget in your wget
emmarousseau marked this conversation as resolved.
Show resolved
Hide resolved
|
||
wget https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq3/testdata/GSE110004/SRR6357070_1.fastq.gz | ||
wget https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq3/testdata/GSE110004/SRR6357070_2.fastq.gz | ||
|
||
# decompress file without keeping the original | ||
gunzip SRR6357070_1.fastq.gz | ||
gunzip SRR6357070_2.fastq.gz | ||
|
||
# only keep 100 reads per file | ||
head -n 400 SRR6357070_1.fastq > SRR6357070_1.fastq.tmp | ||
head -n 400 SRR6357070_2.fastq > SRR6357070_2.fastq.tmp | ||
|
||
mv SRR6357070_1.fastq.tmp SRR6357070_1.fastq | ||
mv SRR6357070_2.fastq.tmp SRR6357070_2.fastq | ||
|
||
rm SRR6357070_1.fastq.gz | ||
rm SRR6357070_2.fastq.gz | ||
|
||
gzip SRR6357070_1.fastq | ||
gzip SRR6357070_2.fastq | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This seems to be empty