Skip to content

Commit

Permalink
Remove script.sh middleman, add test
Browse files Browse the repository at this point in the history
  • Loading branch information
emmarousseau committed Sep 26, 2024
1 parent b51e6fb commit 0aa4613
Show file tree
Hide file tree
Showing 4 changed files with 98 additions and 79 deletions.
15 changes: 7 additions & 8 deletions src/dupradar/config.vsh.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,15 @@ argument_groups:
required: true
description: Path to GTF annotation file.
- name: --paired
type: boolean
type: boolean_true
description: Add flag if input alignment file consists of paired reads
- name: --strandedness
type: string
choices: [forward, reverse, unstranded]
type: integer
choices: [0, 1, 2]
description: |
Strandedness of input bam file reads (forward, reverse or unstranded (default, applicable to paired reads))
Strandedness of input bam file reads (1 = forward, 2 = reverse or 0 = unstranded (default,
applicable to paired reads))
example: 0

- name: Output
arguments:
Expand All @@ -47,7 +49,6 @@ argument_groups:
- name: --output_duprate_exp_boxplot
type: file
direction: output
default: duprate_exp_boxplot.pdf
description: |
Path to output file (pdf) of distribution of expression box plot
- name: --output_duprate_exp_densplot
Expand Down Expand Up @@ -75,9 +76,7 @@ argument_groups:
example: intercept_slope.txt

resources:
- type: bash_script
path: script.sh
# Copied from https://github.com/nf-core/rnaseq/blob/3.12.0/bin/dupradar.r
# Adapted from https://github.com/nf-core/rnaseq/blob/3.12.0/bin/dupradar.r
- type: r_script
path: dupradar.r
test_resources:
Expand Down
78 changes: 49 additions & 29 deletions src/dupradar/dupradar.r
Original file line number Diff line number Diff line change
@@ -1,24 +1,48 @@
#!/usr/bin/env Rscript

# Command line argument processing
args = commandArgs(trailingOnly=TRUE)
if (length(args) < 5) {
## VIASH START
par <- list(
"input" = 'test_data/sample.bam',
"id" = 'test',
"gtf_annotation" = 'test_data/genes.gtf',
"strandedness" = 1,
"paired" = TRUE,
"threads" = 1,
"output_dupmatrix"="dup_matrix.txt",
"output_dup_intercept_mqc"="dup_intercept_mqc.txt",
"output_duprate_exp_boxplot"="duprate_exp_boxplot.pdf",
"output_duprate_exp_densplot"="duprate_exp_densityplot.pdf",
"output_duprate_exp_denscurve_mqc"="duprate_exp_density_curve_mqc.pdf",
"output_expression_histogram"="expression_hist.pdf",
"output_intercept_slope"="intercept_slope.txt"
)
## VIASH END


if (length(par) < 5) {
stop("Usage: dupRadar.r <input.bam> <sample_id> <annotation.gtf> <strandDirection:0=unstranded/1=forward/2=reverse> <paired/single> <nbThreads> <R-package-location (optional)>", call.=FALSE)
}

message("paired_end is", args[5])
message("the type is is", class(args[5]))

input_bam <- args[1]
output_prefix <- args[2]
annotation_gtf <- args[3]
stranded <- as.numeric(args[4])
paired_end <- ifelse(args[5] == "true", TRUE, FALSE)
threads <- as.numeric(args[6])
input_bam <- par$input
output_prefix <- par$id
annotation_gtf <- par$gtf_annotation
stranded <- ifelse(is.null(par$strandedness), 0, as.integer(par$strandedness))
paired_end <- ifelse(par$paired, TRUE, FALSE)
threads <- ifelse(is.null(par$threads), 1, as.integer(par$threads))

output_dupmatrix <- ifelse(is.null(par$output_dupmatrix), paste0(output_prefix, "_dupMatrix.txt", sep=""), par$output_dupmatrix)
output_dup_intercept_mqc <- ifelse(is.null(par$output_dup_intercept_mqc), paste0(output_prefix, "_dup_intercept_mqc.txt", sep=""), par$output_dup_intercept_mqc)
output_duprate_exp_boxplot <- ifelse(is.null(par$output_duprate_exp_boxplot), paste0(output_prefix, "_duprateExpBoxplot.pdf", sep=""), par$output_duprate_exp_boxplot)
output_duprate_exp_densplot <- ifelse(is.null(par$output_duprate_exp_densplot), paste0(output_prefix, "_duprate_exp_densplot.pdf", sep=""), par$output_duprate_exp_densplot)
output_duprate_exp_denscurve_mqc <- ifelse(is.null(par$output_duprate_exp_denscurve_mqc), paste0(output_prefix, "_duprateExpDensCurve_mqc.txt", sep=""), par$output_duprate_exp_denscurve_mqc)
output_expression_histogram <- ifelse(is.null(par$output_expression_histogram), paste0(output_prefix, "_expressionHist.pdf", sep=""), par$output_expression_histogram)
output_intercept_slope <- ifelse(is.null(par$output_intercept_slope), paste0(output_prefix, "_intercept_slope.txt", sep=""), par$output_intercept_slope)

bamRegex <- "(.+)\\.bam$"

if(!(grepl(bamRegex, input_bam) && file.exists(input_bam) && (!file.info(input_bam)$isdir))) stop("First argument '<input.bam>' must be an existing file (not a directory) with '.bam' extension...")

if(!(grepl(bamRegex, input_bam) && file.exists(input_bam) && (!file.info(input_bam)$isdir))) stop("First argument '<input.bam>' must be an existing file (not a directory) with '.bam' extension...")
if(!(file.exists(annotation_gtf) && (!file.info(annotation_gtf)$isdir))) stop("Third argument '<annotation.gtf>' must be an existing file (and not a directory)...")
if(is.na(stranded) || (!(stranded %in% (0:2)))) stop("Fourth argument <strandDirection> must be a numeric value in 0(unstranded)/1(forward)/2(reverse)...")
if(is.na(threads) || (threads<=0)) stop("Fifth argument <nbThreads> must be a strictly positive numeric value...")
Expand All @@ -27,7 +51,7 @@ if(is.na(threads) || (threads<=0)) stop("Fifth argument <nbThreads> must be a st
message("Input bam (Arg 1): ", input_bam)
message("Output basename(Arg 2): ", output_prefix)
message("Input gtf (Arg 3): ", annotation_gtf)
message("Strandness (Arg 4): ", c("unstranded", "forward", "reverse")[stranded+1])
message("Strandness (Arg 4): ", c("unstranded", "forward", "reverse")[stranded])
message("paired_end (Arg 5): ", paired_end)
message("Nb threads (Arg 6): ", threads)
message("R package loc. (Arg 7): ", ifelse(length(args) > 4, args[5], "Not specified"))
Expand All @@ -43,25 +67,18 @@ if (!require("dupRadar")) {
library("dupRadar")
}
if (!require("parallel")) {
install.packages("parallel", dependencies=TRUE, repos='http://cloud.r-project.org/')
library("parallel")
}


# Duplicate stats
dm <- analyzeDuprates(input_bam, annotation_gtf, stranded, paired_end, threads)
print(input_bam)
print(output_prefix)
print(annotation_gtf)
print(stranded)
print(paired_end)
print(threads)
print("analyzeDuprates done")
write.table(dm, file=paste(output_prefix, "_dupMatrix.txt", sep=""), quote=F, row.name=F, sep="\t")
write.table(dm, file=output_dupmatrix, quote=F, row.name=F, sep="\t")
print("write.table done")

# 2D density scatter plot
pdf(paste0(output_prefix, "_duprateExpDens.pdf"))
pdf(output_duprate_exp_densplot)
print("pdf done")
duprateExpDensPlot(DupMat=dm)
title("Density scatter plot")
Expand All @@ -74,7 +91,7 @@ cat(
paste("- dupRadar Int (duprate at low read counts):", fit$intercept),
paste("- dupRadar Sl (progression of the duplication rate):", fit$slope),
fill=TRUE, labels=output_prefix,
file=paste0(output_prefix, "_intercept_slope.txt"), append=FALSE
file=output_intercept_slope, append=FALSE
)

# Create a multiqc file dupInt
Expand All @@ -92,8 +109,8 @@ line="#id: DupInt
# format: '{:.2f}%'
Sample dupRadar_intercept"

write(line,file=paste0(output_prefix, "_dup_intercept_mqc.txt"),append=TRUE)
write(paste(sample_name, fit$intercept),file=paste0(output_prefix, "_dup_intercept_mqc.txt"),append=TRUE)
write(line,file=output_dup_intercept_mqc, append=TRUE)
write(paste(sample_name, fit$intercept),file=output_dup_intercept_mqc, append=TRUE)
print("write dup_intercept_mqc done")

# Get numbers from dupRadar GLM
Expand Down Expand Up @@ -142,28 +159,31 @@ line="#id: dupradar
# value: 1000
# width: 1"

write(line,file=paste0(output_prefix, "_duprateExpDensCurve_mqc.txt"),append=TRUE)
write(line,file=output_duprate_exp_denscurve_mqc, append=TRUE)
write.table(
cbind(curve_x, curve_y),
file=paste0(output_prefix, "_duprateExpDensCurve_mqc.txt"),
file=output_duprate_exp_denscurve_mqc,
quote=FALSE, row.names=FALSE, col.names=FALSE, append=TRUE,
)
print("write duprateExpDensCurve_mqc done")
# Distribution of expression box plot
pdf(paste0(output_prefix, "_duprateExpBoxplot.pdf"))
pdf(output_duprate_exp_boxplot)
duprateExpBoxplot(DupMat=dm)
title("Percent Duplication by Expression")
mtext(output_prefix, side=3)
dev.off()
print("duprateExpBoxplot done")
# Distribution of RPK values per gene
pdf(paste0(output_prefix, "_expressionHist.pdf"))
pdf(output_expression_histogram)
expressionHist(DupMat=dm)
title("Distribution of RPK values per gene")
mtext(output_prefix, side=3)
dev.off()
print("expressionHist done")


#

# Print sessioninfo to standard out
print(output_prefix)
# citation("dupRadar")
Expand Down
39 changes: 0 additions & 39 deletions src/dupradar/script.sh

This file was deleted.

45 changes: 42 additions & 3 deletions src/dupradar/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ output_dupmatrix="dup_matrix.txt"
output_dup_intercept_mqc="dup_intercept_mqc.txt"
output_duprate_exp_boxplot="duprate_exp_boxplot.pdf"
output_duprate_exp_densplot="duprate_exp_densityplot.pdf"
output_duprate_exp_denscurve_mqc="duprate_exp_density_curve_mqc.pdf"
output_duprate_exp_denscurve_mqc="duprate_exp_density_curve_mqc.txt"
output_expression_histogram="expression_hist.pdf"
output_intercept_slope="intercept_slope.txt"

Expand All @@ -19,8 +19,7 @@ echo "> Running $meta_functionality_name for unpaired reads, writing to tmpdir $
--input "$input_bam" \
--id "test" \
--gtf_annotation "$input_gtf" \
--strandedness "forward" \
--paired false \
--strandedness 1 \
--output_dupmatrix $output_dupmatrix \
--output_dup_intercept_mqc $output_dup_intercept_mqc \
--output_duprate_exp_boxplot $output_duprate_exp_boxplot \
Expand Down Expand Up @@ -65,4 +64,44 @@ diff -B -b "$output_duprate_exp_denscurve_mqc" "${meta_resources_dir}/test_data/
{ echo "Output file $output_duprate_exp_denscurve_mqc is not correct"; exit 1; }
diff -B -b "$output_intercept_slope" "${meta_resources_dir}/test_data/test_intercept_slope.txt" || \
{ echo "Output file $output_intercept_slope is not correct"; exit 1; }


echo ">>> Test 2: Example without specified output files"

"$meta_executable" \
--input "$input_bam" \
--id "test" \
--gtf_annotation "$input_gtf" \
--strandedness 1

exit_code=$?
[[ $exit_code != 0 ]] && echo "Non zero exit code: $exit_code" && exit 1

echo ">> asserting output has been created for paired read input"
# output files with original names from DupRadar and id prefix
[ ! -f "test_dupMatrix.txt" ] && echo "test_dupMatrix.txt was not created" && exit 1
[ ! -s "test_dupMatrix.txt" ] && echo "test_dupMatrix.txt is empty" && exit 1
[ ! -f "test_dup_intercept_mqc.txt" ] && echo "test_dup_intercept_mqc.txt was not created" && exit 1
[ ! -s "test_dup_intercept_mqc.txt" ] && echo "test_dup_intercept_mqc.txt is empty" && exit 1
[ ! -f "test_duprateExpBoxplot.pdf" ] && echo "test_duprateExpBoxplot.pdf was not created" && exit 1
[ ! -s "test_duprateExpBoxplot.pdf" ] && echo "test_duprateExpBoxplot.pdf is empty" && exit 1
[ ! -f "test_duprate_exp_densplot.pdf" ] && echo "test_duprate_exp_densplot.pdf was not created" && exit 1
[ ! -s "test_duprate_exp_densplot.pdf" ] && echo "test_duprate_exp_densplot.pdf is empty" && exit 1
[ ! -f "test_duprateExpDensCurve_mqc.txt" ] && echo "test_duprateExpDensCurve_mqc.txt was not created" && exit 1
[ ! -s "test_duprateExpDensCurve_mqc.txt" ] && echo "test_duprateExpDensCurve_mqc.txt is empty" && exit 1
[ ! -f "test_expressionHist.pdf" ] && echo "test_expressionHist.pdf was not created" && exit 1
[ ! -s "test_expressionHist.pdf" ] && echo "test_expressionHist.pdf is empty" && exit 1
[ ! -f "test_intercept_slope.txt" ] && echo "test_intercept_slope.txt was not created" && exit 1
[ ! -s "test_intercept_slope.txt" ] && echo "test_intercept_slope.txt is empty" && exit 1


echo ">> Check if output is correct"
diff -B -b "test_dupMatrix.txt" "${meta_resources_dir}/test_data/test_dupMatrix.txt" || \
{ echo "Output file test_dupMatrix.txt is not correct"; exit 1; }
diff -B -b "test_duprateExpDensCurve_mqc.txt" "${meta_resources_dir}/test_data/test_duprateExpDensCurve_mqc.txt" || \
{ echo "Output file test_duprateExpDensCurve_mqc.txt is not correct"; exit 1; }
diff -B -b "test_intercept_slope.txt" "${meta_resources_dir}/test_data/test_intercept_slope.txt" || \
{ echo "Output file test_intercept_slope.txt is not correct"; exit 1; }

echo ">>> All tests passed"
exit 0

0 comments on commit 0aa4613

Please sign in to comment.