From aeb8419f4e17ca9c07d6fb02cb78181ffd68492f Mon Sep 17 00:00:00 2001 From: emmarousseau Date: Wed, 10 Jul 2024 23:39:30 +0200 Subject: [PATCH] expected output (not correct) and update to R script package installation --- src/dupradar/config.vsh.yaml | 18 ++++----- src/dupradar/script.R | 10 +++-- src/dupradar/script.sh | 3 +- src/dupradar/test.sh | 24 +++++++++++- src/dupradar/test_data/test_dupMatrix.txt | 7 ++++ .../test_data/test_dup_intercept_mqc.txt | 39 +++++++++++++++++++ .../test_duprateExpDensCurve_mqc.txt | 33 ++++++++++++++++ .../test_data/test_intercept_slope.txt | 2 + 8 files changed, 120 insertions(+), 16 deletions(-) create mode 100644 src/dupradar/test_data/test_dupMatrix.txt create mode 100644 src/dupradar/test_data/test_dup_intercept_mqc.txt create mode 100644 src/dupradar/test_data/test_duprateExpDensCurve_mqc.txt create mode 100644 src/dupradar/test_data/test_intercept_slope.txt diff --git a/src/dupradar/config.vsh.yaml b/src/dupradar/config.vsh.yaml index cb220f47..9727fdec 100644 --- a/src/dupradar/config.vsh.yaml +++ b/src/dupradar/config.vsh.yaml @@ -38,18 +38,16 @@ argument_groups: type: file direction: output description: path to output file (txt) of duplicate tag counts - example: $id.dup_matrix.txt + example: dup_matrix.txt - name: --output_dup_intercept_mqc type: file direction: output description: path to output file (txt) of multiqc intercept value DupRadar - example: $id.dup_intercept_mqc.txt + example: dup_intercept_mqc.txt - name: --output_duprate_exp_boxplot type: file direction: output - required: false - must_exist: true - default: $id.duprate_exp_boxplot.pdf + default: duprate_exp_boxplot.pdf description: | Path to output file (pdf) of distribution of expression box plot - name: --output_duprate_exp_densplot @@ -57,24 +55,24 @@ argument_groups: direction: output description: | Path to output file (pdf) of 2D density scatter plot of duplicate tag counts - example: $id.duprate_exp_densityplot.pdf + example: duprate_exp_densityplot.pdf - name: --output_duprate_exp_denscurve_mqc type: file direction: output description: | Path to output file (pdf) of density curve of gene duplication multiqc - example: $id.duprate_exp_density_curve_mqc.txt + example: duprate_exp_density_curve_mqc.txt - name: --output_expression_histogram type: file direction: output description: | Path to output file (pdf) of distribution of RPK values per gene histogram - example: $id.expression_hist.pdf + example: expression_hist.pdf - name: --output_intercept_slope type: file direction: output description: output file (txt) with progression of duplication rate values - example: $id.intercept_slope.txt + example: intercept_slope.txt resources: - type: bash_script @@ -85,6 +83,8 @@ resources: test_resources: - type: bash_script path: test.sh + - type: file + path: test_data engines: - type: docker image: quay.io/biocontainers/bioconductor-dupradar:1.32.0--r43hdfd78af_0 diff --git a/src/dupradar/script.R b/src/dupradar/script.R index 82218c6e..24ebce2d 100755 --- a/src/dupradar/script.R +++ b/src/dupradar/script.R @@ -35,10 +35,12 @@ message("R package loc. (Arg 7): ", ifelse(length(args) > 4, args[5], "Not speci # Load / install packages if (length(args) > 5) { .libPaths( c( args[6], .libPaths() ) ) } -if (!require("dupRadar")){ - source("http://bioconductor.org/biocLite.R") - biocLite("dupRadar", suppressUpdates=TRUE) - library("dupRadar") +if (!require("dupRadar")) { + if (!requireNamespace("BiocManager", quietly = TRUE)) { + install.packages("BiocManager") + } + BiocManager::install("dupRadar", update = TRUE, ask=FALSE) + library("dupRadar") } if (!require("parallel")) { install.packages("parallel", dependencies=TRUE, repos='http://cloud.r-project.org/') diff --git a/src/dupradar/script.sh b/src/dupradar/script.sh index a590401a..c84440a7 100755 --- a/src/dupradar/script.sh +++ b/src/dupradar/script.sh @@ -19,7 +19,8 @@ Rscript "$meta_resources_dir/script.R" \ $par_id \ $par_gtf_annotation \ $(num_strandness) \ - $par_paired + $par_paired \ + ${meta_cpus:-1} mv "$par_id"_dupMatrix.txt $par_output_dupmatrix mv "$par_id"_dup_intercept_mqc.txt $par_output_dup_intercept_mqc diff --git a/src/dupradar/test.sh b/src/dupradar/test.sh index 01072e01..8a85af27 100755 --- a/src/dupradar/test.sh +++ b/src/dupradar/test.sh @@ -1,8 +1,8 @@ #!/bin/bash # define input and output for script -input_bam="$meta_resources_dir/sample.bam" -input_gtf="$meta_resources_dir/genes.gtf" +input_bam="${meta_resources_dir}/test_data/sample.bam" +input_gtf="${meta_resources_dir}/test_data/genes.gtf" output_dupmatrix="dup_matrix.txt" output_dup_intercept_mqc="dup_intercept_mqc.txt" @@ -48,4 +48,24 @@ echo ">> asserting output has been created for paired read input" [ ! -f "$output_intercept_slope" ] && echo "$output_intercept_slope was not created" && exit 1 [ ! -s "$output_intercept_slope" ] && echo "$output_intercept_slope is empty" && exit 1 + +echo ">> Check if output is empty" +[ ! -s "$output_dupmatrix" ] \ + && echo "Output file $output_dupmatrix is empty" && exit 1 +[ ! -s "$output_dup_intercept_mqc" ] \ + && echo "Output file $output_dup_intercept_mqc is empty" && exit 1 +[ ! -s "$output_intercept_slope" ] \ + && echo "Output file $output_intercept_slope is empty" && exit 1 + +echo ">> Check if output is correct" +cat "$output_dupmatrix" +cat "$output_dup_intercept_mqc" +cat "$output_intercept_slope" +# diff ignoring white spaces +diff -B -b "test_dupMatrix.pdf" "${meta_resources_dir}/test_data/test_dupMatrix.txt" || + (echo "Output file $output_dupmatrix is not correct" && exit 1) +diff -B -b "$output_dup_intercept_mqc" "${meta_resources_dir}/test_data/test_duprateExpDensCurve_mqc.txt" || \ + (echo "Output file $output_dup_intercept_mqc is not correct" && exit 1) +diff -B -b "$output_intercept_slope" "${meta_resources_dir}/test_data/test_intercept_slope.txt" || \ + (echo "Output file $output_intercept_slope is not correct" && exit 1) exit 0 \ No newline at end of file diff --git a/src/dupradar/test_data/test_dupMatrix.txt b/src/dupradar/test_data/test_dupMatrix.txt new file mode 100644 index 00000000..8ccda72e --- /dev/null +++ b/src/dupradar/test_data/test_dupMatrix.txt @@ -0,0 +1,7 @@ +ID geneLength allCountsMulti filteredCountsMulti dupRateMulti dupsPerIdMulti RPKMulti PKMMulti allCounts filteredCounts dupRate dupsPerId RPK RPKM +WASH7P 1769 41 41 0 0 23.1769361221029 188430.374976446 1 1 0 0 0.565291124929339 4595.86280430357 +FAM138A 2260 0 0 NA 0 0 0 0 0 NA 0 0 0 +FAM138F 2260 0 0 NA 0 0 0 0 0 NA 0 0 0 +OR4F5 918 0 0 NA 0 0 0 0 0 NA 0 0 0 +LOC729737 5474 18 18 0 0 3.28827183047132 26733.917320905 3 3 0 0 0.548045305078553 4455.65288681751 +LOC100132287 8740 39 39 0 0 4.46224256292906 36278.3948205615 1 1 0 0 0.11441647597254 930.215251809269 diff --git a/src/dupradar/test_data/test_dup_intercept_mqc.txt b/src/dupradar/test_data/test_dup_intercept_mqc.txt new file mode 100644 index 00000000..0d0b4e1a --- /dev/null +++ b/src/dupradar/test_data/test_dup_intercept_mqc.txt @@ -0,0 +1,39 @@ +#id: DupInt +#plot_type: 'generalstats' +#pconfig: +# dupRadar_intercept: +# title: 'dupInt' +# namespace: 'DupRadar' +# description: 'Intercept value from DupRadar' +# max: 100 +# min: 0 +# scale: 'RdYlGn-rev' +# format: '{:.2f}%' +Sample dupRadar_intercept +test 5.8262146393079e-11 +#id: DupInt +#plot_type: 'generalstats' +#pconfig: +# dupRadar_intercept: +# title: 'dupInt' +# namespace: 'DupRadar' +# description: 'Intercept value from DupRadar' +# max: 100 +# min: 0 +# scale: 'RdYlGn-rev' +# format: '{:.2f}%' +Sample dupRadar_intercept +test 5.8262146393079e-11 +#id: DupInt +#plot_type: 'generalstats' +#pconfig: +# dupRadar_intercept: +# title: 'dupInt' +# namespace: 'DupRadar' +# description: 'Intercept value from DupRadar' +# max: 100 +# min: 0 +# scale: 'RdYlGn-rev' +# format: '{:.2f}%' +Sample dupRadar_intercept +test 5.8262146393079e-11 diff --git a/src/dupradar/test_data/test_duprateExpDensCurve_mqc.txt b/src/dupradar/test_data/test_duprateExpDensCurve_mqc.txt new file mode 100644 index 00000000..ca9f199e --- /dev/null +++ b/src/dupradar/test_data/test_duprateExpDensCurve_mqc.txt @@ -0,0 +1,33 @@ +#id: dupradar +#section_name: 'DupRadar' +#section_href: 'bioconductor.org/packages/release/bioc/html/dupRadar.html' +#description: "provides duplication rate quality control for RNA-Seq datasets. Highly expressed genes can be expected to have a lot of duplicate reads, but high numbers of duplicates at low read counts can indicate low library complexity with technical duplication. +# This plot shows the general linear models - a summary of the gene duplication distributions. " +#pconfig: +# title: 'DupRadar General Linear Model' +# xLog: True +# xlab: 'expression (reads/kbp)' +# ylab: '% duplicate reads' +# ymax: 100 +# ymin: 0 +# tt_label: '{point.x:.1f} reads/kbp: {point.y:,.2f}% duplicates' +# xPlotLines: +# - color: 'green' +# dashStyle: 'LongDash' +# label: +# style: {color: 'green'} +# text: '0.5 RPKM' +# verticalAlign: 'bottom' +# y: -65 +# value: 0.5 +# width: 1 +# - color: 'red' +# dashStyle: 'LongDash' +# label: +# style: {color: 'red'} +# text: '1 read/bp' +# verticalAlign: 'bottom' +# y: -65 +# value: 1000 +# width: 1 +0.11441647597254 5.82621463896864e-09 diff --git a/src/dupradar/test_data/test_intercept_slope.txt b/src/dupradar/test_data/test_intercept_slope.txt new file mode 100644 index 00000000..5294b8ba --- /dev/null +++ b/src/dupradar/test_data/test_intercept_slope.txt @@ -0,0 +1,2 @@ +test - dupRadar Int (duprate at low read counts): 5.8262146393079e-11 +test - dupRadar Sl (progression of the duplication rate): 0.999999999999966