From 0aa46130fcb04c9baf2748001335702ded651f99 Mon Sep 17 00:00:00 2001 From: emmarousseau Date: Thu, 26 Sep 2024 14:24:14 +0200 Subject: [PATCH] Remove script.sh middleman, add test --- src/dupradar/config.vsh.yaml | 15 ++++--- src/dupradar/dupradar.r | 78 ++++++++++++++++++++++-------------- src/dupradar/script.sh | 39 ------------------ src/dupradar/test.sh | 45 +++++++++++++++++++-- 4 files changed, 98 insertions(+), 79 deletions(-) delete mode 100755 src/dupradar/script.sh diff --git a/src/dupradar/config.vsh.yaml b/src/dupradar/config.vsh.yaml index d25547db..ce4f79b1 100644 --- a/src/dupradar/config.vsh.yaml +++ b/src/dupradar/config.vsh.yaml @@ -24,13 +24,15 @@ argument_groups: required: true description: Path to GTF annotation file. - name: --paired - type: boolean + type: boolean_true description: Add flag if input alignment file consists of paired reads - name: --strandedness - type: string - choices: [forward, reverse, unstranded] + type: integer + choices: [0, 1, 2] description: | - Strandedness of input bam file reads (forward, reverse or unstranded (default, applicable to paired reads)) + Strandedness of input bam file reads (1 = forward, 2 = reverse or 0 = unstranded (default, + applicable to paired reads)) + example: 0 - name: Output arguments: @@ -47,7 +49,6 @@ argument_groups: - name: --output_duprate_exp_boxplot type: file direction: output - default: duprate_exp_boxplot.pdf description: | Path to output file (pdf) of distribution of expression box plot - name: --output_duprate_exp_densplot @@ -75,9 +76,7 @@ argument_groups: example: intercept_slope.txt resources: - - type: bash_script - path: script.sh - # Copied from https://github.com/nf-core/rnaseq/blob/3.12.0/bin/dupradar.r + # Adapted from https://github.com/nf-core/rnaseq/blob/3.12.0/bin/dupradar.r - type: r_script path: dupradar.r test_resources: diff --git a/src/dupradar/dupradar.r b/src/dupradar/dupradar.r index 4294a42e..a3497e7a 100755 --- a/src/dupradar/dupradar.r +++ b/src/dupradar/dupradar.r @@ -1,24 +1,48 @@ #!/usr/bin/env Rscript -# Command line argument processing -args = commandArgs(trailingOnly=TRUE) -if (length(args) < 5) { +## VIASH START +par <- list( + "input" = 'test_data/sample.bam', + "id" = 'test', + "gtf_annotation" = 'test_data/genes.gtf', + "strandedness" = 1, + "paired" = TRUE, + "threads" = 1, + "output_dupmatrix"="dup_matrix.txt", + "output_dup_intercept_mqc"="dup_intercept_mqc.txt", + "output_duprate_exp_boxplot"="duprate_exp_boxplot.pdf", + "output_duprate_exp_densplot"="duprate_exp_densityplot.pdf", + "output_duprate_exp_denscurve_mqc"="duprate_exp_density_curve_mqc.pdf", + "output_expression_histogram"="expression_hist.pdf", + "output_intercept_slope"="intercept_slope.txt" +) +## VIASH END + + +if (length(par) < 5) { stop("Usage: dupRadar.r ", call.=FALSE) } -message("paired_end is", args[5]) -message("the type is is", class(args[5])) -input_bam <- args[1] -output_prefix <- args[2] -annotation_gtf <- args[3] -stranded <- as.numeric(args[4]) -paired_end <- ifelse(args[5] == "true", TRUE, FALSE) -threads <- as.numeric(args[6]) +input_bam <- par$input +output_prefix <- par$id +annotation_gtf <- par$gtf_annotation +stranded <- ifelse(is.null(par$strandedness), 0, as.integer(par$strandedness)) +paired_end <- ifelse(par$paired, TRUE, FALSE) +threads <- ifelse(is.null(par$threads), 1, as.integer(par$threads)) + +output_dupmatrix <- ifelse(is.null(par$output_dupmatrix), paste0(output_prefix, "_dupMatrix.txt", sep=""), par$output_dupmatrix) +output_dup_intercept_mqc <- ifelse(is.null(par$output_dup_intercept_mqc), paste0(output_prefix, "_dup_intercept_mqc.txt", sep=""), par$output_dup_intercept_mqc) +output_duprate_exp_boxplot <- ifelse(is.null(par$output_duprate_exp_boxplot), paste0(output_prefix, "_duprateExpBoxplot.pdf", sep=""), par$output_duprate_exp_boxplot) +output_duprate_exp_densplot <- ifelse(is.null(par$output_duprate_exp_densplot), paste0(output_prefix, "_duprate_exp_densplot.pdf", sep=""), par$output_duprate_exp_densplot) +output_duprate_exp_denscurve_mqc <- ifelse(is.null(par$output_duprate_exp_denscurve_mqc), paste0(output_prefix, "_duprateExpDensCurve_mqc.txt", sep=""), par$output_duprate_exp_denscurve_mqc) +output_expression_histogram <- ifelse(is.null(par$output_expression_histogram), paste0(output_prefix, "_expressionHist.pdf", sep=""), par$output_expression_histogram) +output_intercept_slope <- ifelse(is.null(par$output_intercept_slope), paste0(output_prefix, "_intercept_slope.txt", sep=""), par$output_intercept_slope) bamRegex <- "(.+)\\.bam$" -if(!(grepl(bamRegex, input_bam) && file.exists(input_bam) && (!file.info(input_bam)$isdir))) stop("First argument '' must be an existing file (not a directory) with '.bam' extension...") + +if(!(grepl(bamRegex, input_bam) && file.exists(input_bam) && (!file.info(input_bam)$isdir))) stop("First argument '' must be an existing file (not a directory) with '.bam' extension...") if(!(file.exists(annotation_gtf) && (!file.info(annotation_gtf)$isdir))) stop("Third argument '' must be an existing file (and not a directory)...") if(is.na(stranded) || (!(stranded %in% (0:2)))) stop("Fourth argument must be a numeric value in 0(unstranded)/1(forward)/2(reverse)...") if(is.na(threads) || (threads<=0)) stop("Fifth argument must be a strictly positive numeric value...") @@ -27,7 +51,7 @@ if(is.na(threads) || (threads<=0)) stop("Fifth argument must be a st message("Input bam (Arg 1): ", input_bam) message("Output basename(Arg 2): ", output_prefix) message("Input gtf (Arg 3): ", annotation_gtf) -message("Strandness (Arg 4): ", c("unstranded", "forward", "reverse")[stranded+1]) +message("Strandness (Arg 4): ", c("unstranded", "forward", "reverse")[stranded]) message("paired_end (Arg 5): ", paired_end) message("Nb threads (Arg 6): ", threads) message("R package loc. (Arg 7): ", ifelse(length(args) > 4, args[5], "Not specified")) @@ -43,25 +67,18 @@ if (!require("dupRadar")) { library("dupRadar") } if (!require("parallel")) { - install.packages("parallel", dependencies=TRUE, repos='http://cloud.r-project.org/') library("parallel") } # Duplicate stats dm <- analyzeDuprates(input_bam, annotation_gtf, stranded, paired_end, threads) -print(input_bam) -print(output_prefix) -print(annotation_gtf) -print(stranded) -print(paired_end) -print(threads) print("analyzeDuprates done") -write.table(dm, file=paste(output_prefix, "_dupMatrix.txt", sep=""), quote=F, row.name=F, sep="\t") +write.table(dm, file=output_dupmatrix, quote=F, row.name=F, sep="\t") print("write.table done") # 2D density scatter plot -pdf(paste0(output_prefix, "_duprateExpDens.pdf")) +pdf(output_duprate_exp_densplot) print("pdf done") duprateExpDensPlot(DupMat=dm) title("Density scatter plot") @@ -74,7 +91,7 @@ cat( paste("- dupRadar Int (duprate at low read counts):", fit$intercept), paste("- dupRadar Sl (progression of the duplication rate):", fit$slope), fill=TRUE, labels=output_prefix, - file=paste0(output_prefix, "_intercept_slope.txt"), append=FALSE + file=output_intercept_slope, append=FALSE ) # Create a multiqc file dupInt @@ -92,8 +109,8 @@ line="#id: DupInt # format: '{:.2f}%' Sample dupRadar_intercept" -write(line,file=paste0(output_prefix, "_dup_intercept_mqc.txt"),append=TRUE) -write(paste(sample_name, fit$intercept),file=paste0(output_prefix, "_dup_intercept_mqc.txt"),append=TRUE) +write(line,file=output_dup_intercept_mqc, append=TRUE) +write(paste(sample_name, fit$intercept),file=output_dup_intercept_mqc, append=TRUE) print("write dup_intercept_mqc done") # Get numbers from dupRadar GLM @@ -142,28 +159,31 @@ line="#id: dupradar # value: 1000 # width: 1" -write(line,file=paste0(output_prefix, "_duprateExpDensCurve_mqc.txt"),append=TRUE) +write(line,file=output_duprate_exp_denscurve_mqc, append=TRUE) write.table( cbind(curve_x, curve_y), - file=paste0(output_prefix, "_duprateExpDensCurve_mqc.txt"), + file=output_duprate_exp_denscurve_mqc, quote=FALSE, row.names=FALSE, col.names=FALSE, append=TRUE, ) print("write duprateExpDensCurve_mqc done") # Distribution of expression box plot -pdf(paste0(output_prefix, "_duprateExpBoxplot.pdf")) +pdf(output_duprate_exp_boxplot) duprateExpBoxplot(DupMat=dm) title("Percent Duplication by Expression") mtext(output_prefix, side=3) dev.off() print("duprateExpBoxplot done") # Distribution of RPK values per gene -pdf(paste0(output_prefix, "_expressionHist.pdf")) +pdf(output_expression_histogram) expressionHist(DupMat=dm) title("Distribution of RPK values per gene") mtext(output_prefix, side=3) dev.off() print("expressionHist done") + +# + # Print sessioninfo to standard out print(output_prefix) # citation("dupRadar") diff --git a/src/dupradar/script.sh b/src/dupradar/script.sh deleted file mode 100755 index 6ac70bf0..00000000 --- a/src/dupradar/script.sh +++ /dev/null @@ -1,39 +0,0 @@ -#!/bin/bash -## VIASH START -## VIASH END - - -set -eo pipefail - - -function num_strandness { - if [ $par_strandedness == 'unstranded' ]; then echo 0 - elif [ $par_strandedness == 'forward' ]; then echo 1 - elif [ $par_strandedness == 'reverse' ]; then echo 2 - else echo "strandedness must be unstranded, forward or reverse." && \ - exit 1 - fi -} - - -if [ $par_paired ]; then paired=TRUE; else paired=FALSE; fi - - -Rscript "$meta_resources_dir/dupradar.r" \ - $par_input \ - $par_id \ - $par_gtf_annotation \ - $(num_strandness) \ - ${paired} \ - ${meta_cpus:-1} - -mv "$par_id"_dupMatrix.txt $par_output_dupmatrix -mv "$par_id"_dup_intercept_mqc.txt $par_output_dup_intercept_mqc -mv "$par_id"_duprateExpBoxplot.pdf $par_output_duprate_exp_boxplot -mv "$par_id"_duprateExpDens.pdf $par_output_duprate_exp_densplot -mv "$par_id"_duprateExpDensCurve_mqc.txt $par_output_duprate_exp_denscurve_mqc -mv "$par_id"_expressionHist.pdf $par_output_expression_histogram -mv "$par_id"_intercept_slope.txt $par_output_intercept_slope - - -exit 0 diff --git a/src/dupradar/test.sh b/src/dupradar/test.sh index c4afd67a..99610357 100755 --- a/src/dupradar/test.sh +++ b/src/dupradar/test.sh @@ -8,7 +8,7 @@ output_dupmatrix="dup_matrix.txt" output_dup_intercept_mqc="dup_intercept_mqc.txt" output_duprate_exp_boxplot="duprate_exp_boxplot.pdf" output_duprate_exp_densplot="duprate_exp_densityplot.pdf" -output_duprate_exp_denscurve_mqc="duprate_exp_density_curve_mqc.pdf" +output_duprate_exp_denscurve_mqc="duprate_exp_density_curve_mqc.txt" output_expression_histogram="expression_hist.pdf" output_intercept_slope="intercept_slope.txt" @@ -19,8 +19,7 @@ echo "> Running $meta_functionality_name for unpaired reads, writing to tmpdir $ --input "$input_bam" \ --id "test" \ --gtf_annotation "$input_gtf" \ - --strandedness "forward" \ - --paired false \ + --strandedness 1 \ --output_dupmatrix $output_dupmatrix \ --output_dup_intercept_mqc $output_dup_intercept_mqc \ --output_duprate_exp_boxplot $output_duprate_exp_boxplot \ @@ -65,4 +64,44 @@ diff -B -b "$output_duprate_exp_denscurve_mqc" "${meta_resources_dir}/test_data/ { echo "Output file $output_duprate_exp_denscurve_mqc is not correct"; exit 1; } diff -B -b "$output_intercept_slope" "${meta_resources_dir}/test_data/test_intercept_slope.txt" || \ { echo "Output file $output_intercept_slope is not correct"; exit 1; } + + +echo ">>> Test 2: Example without specified output files" + +"$meta_executable" \ + --input "$input_bam" \ + --id "test" \ + --gtf_annotation "$input_gtf" \ + --strandedness 1 + +exit_code=$? +[[ $exit_code != 0 ]] && echo "Non zero exit code: $exit_code" && exit 1 + +echo ">> asserting output has been created for paired read input" +# output files with original names from DupRadar and id prefix +[ ! -f "test_dupMatrix.txt" ] && echo "test_dupMatrix.txt was not created" && exit 1 +[ ! -s "test_dupMatrix.txt" ] && echo "test_dupMatrix.txt is empty" && exit 1 +[ ! -f "test_dup_intercept_mqc.txt" ] && echo "test_dup_intercept_mqc.txt was not created" && exit 1 +[ ! -s "test_dup_intercept_mqc.txt" ] && echo "test_dup_intercept_mqc.txt is empty" && exit 1 +[ ! -f "test_duprateExpBoxplot.pdf" ] && echo "test_duprateExpBoxplot.pdf was not created" && exit 1 +[ ! -s "test_duprateExpBoxplot.pdf" ] && echo "test_duprateExpBoxplot.pdf is empty" && exit 1 +[ ! -f "test_duprate_exp_densplot.pdf" ] && echo "test_duprate_exp_densplot.pdf was not created" && exit 1 +[ ! -s "test_duprate_exp_densplot.pdf" ] && echo "test_duprate_exp_densplot.pdf is empty" && exit 1 +[ ! -f "test_duprateExpDensCurve_mqc.txt" ] && echo "test_duprateExpDensCurve_mqc.txt was not created" && exit 1 +[ ! -s "test_duprateExpDensCurve_mqc.txt" ] && echo "test_duprateExpDensCurve_mqc.txt is empty" && exit 1 +[ ! -f "test_expressionHist.pdf" ] && echo "test_expressionHist.pdf was not created" && exit 1 +[ ! -s "test_expressionHist.pdf" ] && echo "test_expressionHist.pdf is empty" && exit 1 +[ ! -f "test_intercept_slope.txt" ] && echo "test_intercept_slope.txt was not created" && exit 1 +[ ! -s "test_intercept_slope.txt" ] && echo "test_intercept_slope.txt is empty" && exit 1 + + +echo ">> Check if output is correct" +diff -B -b "test_dupMatrix.txt" "${meta_resources_dir}/test_data/test_dupMatrix.txt" || \ + { echo "Output file test_dupMatrix.txt is not correct"; exit 1; } +diff -B -b "test_duprateExpDensCurve_mqc.txt" "${meta_resources_dir}/test_data/test_duprateExpDensCurve_mqc.txt" || \ + { echo "Output file test_duprateExpDensCurve_mqc.txt is not correct"; exit 1; } +diff -B -b "test_intercept_slope.txt" "${meta_resources_dir}/test_data/test_intercept_slope.txt" || \ + { echo "Output file test_intercept_slope.txt is not correct"; exit 1; } + +echo ">>> All tests passed" exit 0 \ No newline at end of file