diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0e32edb1..e36fa5e0 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -16,6 +16,8 @@
 
 * `rsem/rsem_calculate_expression`: Calculate expression levels (PR #93).
 
+* `dupradar`: Assessment of duplication rates in RNA-Seq datasets (PR #154).
+
 * `rseqc`:
   - `rseqc/rseqc_inner_distance`: Calculate inner distance between read pairs (PR #159).
   - `rseqc/rseqc_inferexperiment`: Infer strandedness from sequencing reads (PR #158).
@@ -29,6 +31,7 @@
 
 * `cutadapt`: Fix the the non-functional `action` parameter (PR #161).
 
+
 ## MINOR CHANGES
 
 * `agat_convert_bed2gff`: change type of argument `inflate_off` from `boolean_false` to `boolean_true` (PR #160).
diff --git a/src/dupradar/config.vsh.yaml b/src/dupradar/config.vsh.yaml
new file mode 100644
index 00000000..52063368
--- /dev/null
+++ b/src/dupradar/config.vsh.yaml
@@ -0,0 +1,103 @@
+name: dupradar
+description: Assessment of duplication rates in RNA-Seq datasets
+keywords: [ rnaseq, duplication, genomics ]
+links:
+  homepage: https://bioconductor.org/packages/release/bioc/html/dupRadar.html
+  documentation: https://bioconductor.org/packages/release/bioc/vignettes/dupRadar/inst/doc/dupRadar.html
+  repository: https://github.com/ssayols/dupRadar
+references: 
+  doi: "10.1186/s12859-016-1276-2"
+license: GPL-3.0
+
+argument_groups:
+- name: Input
+  arguments: 
+  - name: --id
+    type: string
+    description: Sample ID
+  - name: --input
+    type: file 
+    required: true
+    description: Path to input alignment file in BAM format
+  - name: --gtf_annotation
+    type: file 
+    required: true
+    description: Path to GTF annotation file.
+  - name: --paired
+    type: boolean_true
+    description: Add flag if input alignment file consists of paired reads
+  - name: --strandedness
+    type: integer
+    choices: [0, 1, 2]
+    description: |
+      Strandedness of input bam file reads (1 = forward, 2 = reverse or 0 = unstranded (default, 
+      applicable to paired reads))
+    example: 0
+    
+- name: Output
+  arguments: 
+  - name: --output_dupmatrix
+    type: file
+    direction: output
+    description: path to output file (txt) of duplicate tag counts
+    example: dup_matrix.txt
+  - name: --output_dup_intercept_mqc
+    type: file
+    direction: output
+    description: path to output file (txt) of multiqc intercept value DupRadar
+    example: dup_intercept_mqc.txt
+  - name: --output_duprate_exp_boxplot
+    type: file
+    direction: output
+    description: |
+      Path to output file (pdf) of distribution of expression box plot
+  - name: --output_duprate_exp_densplot
+    type: file
+    direction: output
+    description: |
+      Path to output file (pdf) of 2D density scatter plot of duplicate tag counts
+    example: duprate_exp_densityplot.pdf
+  - name: --output_duprate_exp_denscurve_mqc
+    type: file
+    direction: output
+    description: |
+      Path to output file (pdf) of density curve of gene duplication multiqc
+    example: duprate_exp_density_curve_mqc.txt
+  - name: --output_expression_histogram
+    type: file
+    direction: output
+    description: |
+      Path to output file (pdf) of distribution of RPK values per gene histogram
+    example: expression_hist.pdf
+  - name: --output_intercept_slope
+    type: file
+    direction: output
+    description: output file (txt) with progression of duplication rate values
+    example: intercept_slope.txt
+
+resources:
+  # Adapted from https://github.com/nf-core/rnaseq/blob/3.12.0/bin/dupradar.r
+  - type: r_script
+    path: dupradar.r
+test_resources:
+  - type: bash_script
+    path: test.sh
+  - type: file
+    path: test_data
+    
+engines:
+  - type: docker
+    image: ubuntu:22.04
+    setup:   
+      - type: apt
+        packages: [ r-base ]
+      - type: r
+        bioc: [ dupRadar ]
+        packages: [ parallel, rlang ]
+      - type: docker
+        run: |
+          echo "DupRadar: $(Rscript -e "library(dupRadar); cat(as.character(packageVersion('dupRadar')))")" > /var/software_versions.txt
+        
+runners:
+  - type: executable  
+  - type: nextflow
diff --git a/src/dupradar/dupradar.r b/src/dupradar/dupradar.r
new file mode 100755
index 00000000..f813e902
--- /dev/null
+++ b/src/dupradar/dupradar.r
@@ -0,0 +1,192 @@
+#!/usr/bin/env Rscript
+
+## VIASH START
+par <- list(
+  "input" = 'test_data/sample.bam',
+  "id" = 'test',
+  "gtf_annotation" = 'test_data/genes.gtf',
+  "strandedness" = 1,
+  "paired" = TRUE,
+  "threads" = 1,
+  "output_dupmatrix" = "dup_matrix.txt",
+  "output_dup_intercept_mqc" = "dup_intercept_mqc.txt",
+  "output_duprate_exp_boxplot" = "duprate_exp_boxplot.pdf",
+  "output_duprate_exp_densplot" = "duprate_exp_densityplot.pdf",
+  "output_duprate_exp_denscurve_mqc" = "duprate_exp_density_curve_mqc.pdf",
+  "output_expression_histogram" = "expression_hist.pdf",
+  "output_intercept_slope" = "intercept_slope.txt"
+)
+## VIASH END
+
+library(rlang)
+
+if (length(par) < 5) {
+    stop("Usage: dupRadar.r <input.bam> <sample_id> <annotation.gtf> <strandDirection:0=unstranded/1=forward/2=reverse> <paired/single> <nbThreads> <R-package-location (optional)>", call.=FALSE)
+}
+
+
+input_bam <- par$input
+output_prefix <- par$id
+annotation_gtf <- par$gtf_annotation
+stranded <- par$strandedness %||% 0L
+paired_end <- ifelse(par$paired, TRUE, FALSE)
+threads <- par$threads %||% 1L
+
+output_dupmatrix <- par$output_dupmatrix %||% paste0(output_prefix, "_dupMatrix.txt")
+output_dup_intercept_mqc <- par$output_dup_intercept_mqc %||% paste0(output_prefix, "_dup_intercept_mqc.txt")
+output_duprate_exp_boxplot <- par$output_duprate_exp_boxplot %||% paste0(output_prefix, "_duprateExpBoxplot.pdf")
+output_duprate_exp_densplot <- par$output_duprate_exp_densplot %||% paste0(output_prefix, "_duprate_exp_densplot.pdf")
+output_duprate_exp_denscurve_mqc <- par$output_duprate_exp_denscurve_mqc %||% paste0(output_prefix, "_duprateExpDensCurve_mqc.txt")
+output_expression_histogram <- par$output_expression_histogram %||% paste0(output_prefix, "_expressionHist.pdf")
+output_intercept_slope <- par$output_intercept_slope %||% paste0(output_prefix, "_intercept_slope.txt")
+
+bamRegex <- "(.+)\\.bam$"
+
+
+if(!(grepl(bamRegex, input_bam) && file.exists(input_bam) && (!file.info(input_bam)$isdir))) stop("First argument '<input.bam>' must be an existing file (not a directory) with '.bam' extension...")
+if(!(file.exists(annotation_gtf) &&  (!file.info(annotation_gtf)$isdir))) stop("Third argument '<annotation.gtf>' must be an existing file (and not a directory)...")
+if(is.na(stranded) || (!(stranded %in% (0:2)))) stop("Fourth argument <strandDirection> must be a numeric value in 0(unstranded)/1(forward)/2(reverse)...")
+if(is.na(threads) || (threads<=0)) stop("Fifth argument <nbThreads> must be a strictly positive numeric value...")
+
+# Debug messages (stderr)
+message("Input bam      (Arg 1): ", input_bam)
+message("Output basename(Arg 2): ", output_prefix)
+message("Input gtf      (Arg 3): ", annotation_gtf)
+message("Strandness     (Arg 4): ", c("unstranded", "forward", "reverse")[stranded])
+message("paired_end     (Arg 5): ", paired_end)
+message("Nb threads     (Arg 6): ", threads)
+message("R package loc. (Arg 7): ", ifelse(length(args) > 4, args[5], "Not specified"))
+
+
+# Load / install packages
+if (length(args) > 5) { .libPaths( c( args[6], .libPaths() ) ) }
+if (!require("dupRadar")) {
+  if (!requireNamespace("BiocManager", quietly = TRUE)) {
+    install.packages("BiocManager")
+  }
+  BiocManager::install("dupRadar", update = TRUE, ask=FALSE)
+  library("dupRadar")
+}
+if (!require("parallel")) {
+    library("parallel")
+}
+
+
+# Duplicate stats
+dm <- analyzeDuprates(input_bam, annotation_gtf, stranded, paired_end, threads)
+print("analyzeDuprates done")
+write.table(dm, file=output_dupmatrix, quote=F, row.name=F, sep="\t")
+print("write.table done")
+
+# 2D density scatter plot
+pdf(output_duprate_exp_densplot)
+print("pdf done")
+duprateExpDensPlot(DupMat=dm)
+title("Density scatter plot")
+mtext(output_prefix, side=3)
+dev.off()
+print("duprateExpDensPlot done")
+fit <- duprateExpFit(DupMat=dm)
+print("duprateExpFit done")
+cat(
+    paste("- dupRadar Int (duprate at low read counts):", fit$intercept),
+    paste("- dupRadar Sl (progression of the duplication rate):", fit$slope),
+    fill=TRUE, labels=output_prefix,
+    file=output_intercept_slope, append=FALSE
+)
+
+# Create a multiqc file dupInt
+sample_name <- gsub("Aligned.sortedByCoord.out.markDups", "", output_prefix)
+line <- "#id: DupInt
+#plot_type: 'generalstats'
+#pconfig:
+#    dupRadar_intercept:
+#        title: 'dupInt'
+#        namespace: 'DupRadar'
+#        description: 'Intercept value from DupRadar'
+#        max: 100
+#        min: 0
+#        scale: 'RdYlGn-rev'
+#        format: '{:.2f}%'
+Sample dupRadar_intercept"
+
+write(line,file=output_dup_intercept_mqc, append=TRUE)
+write(paste(sample_name, fit$intercept),file=output_dup_intercept_mqc, append=TRUE)
+print("write dup_intercept_mqc done")
+
+# Get numbers from dupRadar GLM
+curve_x <- sort(log10(dm$RPK))
+curve_y <- 100*predict(fit$glm, data.frame(x=curve_x), type="response")
+# Remove all of the infinite values
+infs = which(curve_x %in% c(-Inf,Inf))
+curve_x <- curve_x[-infs]
+curve_y <- curve_y[-infs]
+# Reduce number of data points
+curve_x <- curve_x[seq(1, length(curve_x), 10)]
+curve_y <- curve_y[seq(1, length(curve_y), 10)]
+# Convert x values back to real counts
+curve_x <- 10^curve_x
+# Write to file
+line <- "#id: dupradar
+#section_name: 'DupRadar'
+#section_href: 'bioconductor.org/packages/release/bioc/html/dupRadar.html'
+#description: \"provides duplication rate quality control for RNA-Seq datasets. Highly expressed genes can be expected to have a lot of duplicate reads, but high numbers of duplicates at low read counts can indicate low library complexity with technical duplication.
+#    This plot shows the general linear models - a summary of the gene duplication distributions. \"
+#pconfig:
+#    title: 'DupRadar General Linear Model'
+#    xLog: True
+#    xlab: 'expression (reads/kbp)'
+#    ylab: '% duplicate reads'
+#    ymax: 100
+#    ymin: 0
+#    tt_label: '<b>{point.x:.1f} reads/kbp</b>: {point.y:,.2f}% duplicates'
+#    xPlotLines:
+#        - color: 'green'
+#          dashStyle: 'LongDash'
+#          label:
+#                style: {color: 'green'}
+#                text: '0.5 RPKM'
+#                verticalAlign: 'bottom'
+#                y: -65
+#          value: 0.5
+#          width: 1
+#        - color: 'red'
+#          dashStyle: 'LongDash'
+#          label:
+#                style: {color: 'red'}
+#                text: '1 read/bp'
+#                verticalAlign: 'bottom'
+#                y: -65
+#          value: 1000
+#          width: 1"
+
+write(line,file=output_duprate_exp_denscurve_mqc, append=TRUE)
+write.table(
+    cbind(curve_x, curve_y),
+    file=output_duprate_exp_denscurve_mqc,
+    quote=FALSE, row.names=FALSE, col.names=FALSE, append=TRUE,
+)
+print("write duprateExpDensCurve_mqc done")
+# Distribution of expression box plot
+pdf(output_duprate_exp_boxplot)
+duprateExpBoxplot(DupMat=dm)
+title("Percent Duplication by Expression")
+mtext(output_prefix, side=3)
+dev.off()
+print("duprateExpBoxplot done")
+# Distribution of RPK values per gene
+pdf(output_expression_histogram)
+expressionHist(DupMat=dm)
+title("Distribution of RPK values per gene")
+mtext(output_prefix, side=3)
+dev.off()
+print("expressionHist done")
+
+
+# 
+
+# Print sessioninfo to standard out
+print(output_prefix)
+# citation("dupRadar")
+# print("citation done")
+# sessionInfo()
diff --git a/src/dupradar/help.txt b/src/dupradar/help.txt
new file mode 100644
index 00000000..819c352d
--- /dev/null
+++ b/src/dupradar/help.txt
@@ -0,0 +1,583 @@
+---
+title: "Using the dupRadar package"
+author: "Sergi Sayols, Holger Klein"
+date: "June 23, 2015"
+output:
+    BiocStyle::html_document:
+    toc: true
+---
+<!--
+% \VignetteEngine{knitr::rmarkdown}
+% \VignetteIndexEntry{Using dupRadar}
+-->
+
+```{r pre,echo=FALSE,results='hide'}
+library(knitr)
+opts_chunk$set(warning=FALSE,message=FALSE,cache=TRUE)
+```
+```{r style,echo=FALSE,results='asis'}
+BiocStyle::markdown()
+```
+
+
+
+# Introduction to dupRadar
+
+RNA-Seq experiments are a common strategy nowadays to quantify the gene 
+expression levels in cells. Due to its higher sensitivity compared to 
+arrays and the ability to discover novel features, makes them the choice for 
+most modern experiments. 
+
+In RNA-Seq - as in all other NGS applications - quality control is essential.
+Current NGS workflows usually involve PCR steps at some point, which involves
+the danger of PCR artefacts and over-amplification of reads.
+For common DNA-based assays PCR duplicates are often simply removed before 
+further analysis and their overall fraction or the read multiplicity taken 
+as quality metrics.
+In RNA-Seq however, apart from the technical PCR duplicates, there is a strong
+source for biological read duplicates: for a given gene length and sequencing 
+depth there exists an expression level beyond which it is not possible to add
+place more reads inside a gene locus without placing a second read exactly 
+on the position of an already existing read.
+For this reason the overall duplication rate is not a useful measure for 
+RNA-Seq.
+
+As in the NGS/RNA-Seq QC ecosystem there were no suitable tools to address this
+question, we set out to develop a new tool. The `r Rpackage("dupRadar")` 
+package gives an insight into the duplication problem by graphically 
+relating the gene expression level and the duplication rate present on it. 
+Thus, failed experiments can be easily identified at a glance.
+
+
+**Note: by now RNA-Seq protocols have matured so that for bulk RNA protocols data 
+rarely suffer from technical duplicates. With newer low-input or single cell 
+RNA-Seq protocols technical duplicates possible problems are worth to be 
+checked for by default, especially if protocols are pushed to or beyond 
+their boundaries.
+Paired-end libraries make the distinction between duplicates due to highly 
+expressed genes and PCR duplicates easier, but the problem itself is not 
+completely solved, especially at higher sequencing depths.**
+
+# Getting started using dupRadar
+
+## Preparing your data
+
+Previous to running the duplication rate analysis, the BAM file with your
+mapped reads has to be duplicate marked with a like Picard, or the faster 
+BamUtil dedup BioBamBam. 
+The `r Rpackage("dupRadar")` package only works with duplicate marked BAM files.
+
+If you do not have/want a duplication marking step in your default pipeline, 
+the `r Rpackage("dupRadar")` package includes, for your convenience, 
+wrappers to properly call some of these tools from your R session. Note that
+you still have to supply the path of the dupmarker installation though:
+
+```{r loadLibrary}
+library(dupRadar)
+```
+
+Now, simply call the wrapper:
+
+```{r eval=FALSE}
+# call the duplicate marker and analyze the reads
+bamDuprm <- markDuplicates(dupremover="bamutil",
+			   bam="test.bam",
+	    	   path="/opt/bamUtil-master/bin",
+			   rminput=FALSE)
+```
+
+Simply specify which tool to use, the path where this tool is installed, 
+and the input bam file to be analyzed. After marking duplicates, it's safe 
+to remove to original BAM file in order to save space.
+
+The `r Rpackage("dupRadar")` package currently comes with support for:
+
+- [Picard MarkDuplicates](http://broadinstitute.github.io/picard)
+- [BamUtil](http://genome.sph.umich.edu/wiki/BamUtil)
+
+After the BAM file is marked for duplicates, dupRadar is ready to 
+analyze how the duplication rate is related with the estimated gene expression 
+levels.
+
+## A GTF file
+
+Unless there is any specific reason, dupRadar can use the same GTF file 
+that will be later used to count the reads falling on features.
+
+A valid GTF file can be obtained from UCSC, Ensembl, the iGenomes or other 
+projects.
+
+Note that the resulting duplication rate plots depend on the GTF annotation 
+used. GTF files from the gencode projects result in a less clear picture of 
+duplication rates, as there are many more features and feature types 
+annotated, which overlap heavily as well. In some cases creating the plots 
+only using subsets of gencode annotation files (e.g. just protein 
+coding genes) serve the QC purpose of this tool better.
+
+## AnnotationHub as a source of GTF files
+
+The Bioconductor `r Rpackage("AnnotationHub")` package provides an alternative
+approach to obtain annotation in GTF format from entities such as Ensembl, UCSC,
+ENCODE, and 1000 Genomes projects.
+
+This is partly outlined in the AnnotationHub 'HOWTO' vignette
+section "Ensembl GTF and FASTA files for TxDb gene models and sequence
+queries"; for the Takifugu example, the downloaded GTF file is
+available from the cache.
+
+```{r}
+if(suppressWarnings(require(AnnotationHub))) {
+    ah = AnnotationHub()
+    query(ah, c("ensembl", "80", "Takifugu", "gtf"))    # discovery
+    cache(ah["AH47101"])                                # retrieve / file path
+}
+```
+
+# dupRate demo data
+
+In the package we include two precomputed duplication matrices for two RNASeq
+experiments used as examples of a good and a failed (in terms of high redundancy
+of reads) experiments. The experiments come from the ENCODE project, as a source
+of a wide variety of protocols, library types and sequencing facilities.
+
+Load the example dataset with:
+
+```{r}
+attach(dupRadar_examples)
+```
+
+# The duplication rate analysis
+
+The analysis requires some info about the sequencing run and library 
+preparation protocol:
+
+- The strandess information about the reads: is the sequencing strand 
+specific? if so, are the reads reversely sequenced?
+- Are the reads paired, or single?
+
+Due to its phenomenal performance, internally we use the featureCounts() 
+function from the Bioconductor `r Biocpkg("Rsubread")` package, which also
+supports multiple threads.
+
+```{r eval=FALSE}
+# The call parameters: 
+bamDuprm <- "test_duprm.bam"	# the duplicate marked bam file
+gtf <- "genes.gtf"	# the gene model
+stranded <- 2		# '0' (unstranded), '1' (stranded) and '2' (reversely stranded)
+paired   <- FALSE	# is the library paired end?
+threads  <- 4		# number of threads to be used
+
+# Duplication rate analysis
+dm <- analyzeDuprates(bamDuprm,gtf,stranded,paired,threads)
+```
+
+The duplication matrix contains read counts in different scenarios and RPK and RPKM values for every gene.
+
+|ID     | geneLength| allCountsMulti| filteredCountsMulti| dupRateMulti| dupsPerIdMulti|     RPKMulti|  RPKMMulti| allCounts| filteredCounts|   dupRate| dupsPerId|         RPK|       RPKM|
+|:------|----------:|--------------:|-------------------:|------------:|--------------:|------------:|----------:|---------:|--------------:|---------:|---------:|-----------:|----------:|
+|Xkr4   |       3634|              2|                   2|    0.0000000|              0|    0.5503577|  0.0117159|         2|              2| 0.0000000|         0|   0.5503577|  0.0117159|
+|Rp1    |       9747|              0|                   0|          NaN|              0|    0.0000000|  0.0000000|         0|              0|       NaN|         0|   0.0000000|  0.0000000|
+|Sox17  |       3130|              0|                   0|          NaN|              0|    0.0000000|  0.0000000|         0|              0|       NaN|         0|   0.0000000|  0.0000000|
+|Mrpl15 |       4203|            429|                 261|    0.3916084|            168|  102.0699500|  2.1728436|       419|            258| 0.3842482|       161|  99.6906971|  2.1221946|
+|Lypla1 |       2433|           1106|                 576|    0.4792043|            530|  454.5828196|  9.6770634|      1069|            562| 0.4742750|       507| 439.3752569|  9.3533280|
+|Tcea1  |       2847|           2890|                1046|    0.6380623|           1844| 1015.1036178| 21.6093121|      1822|            696| 0.6180022|      1126| 639.9719002| 13.6235871|
+
+# Plotting and interpretation
+
+The number of reads per base assigned to a gene in an ideal RNA-Seq data set 
+is expected to be proportional to the abundance of its transcripts in the 
+sample. For lowly expressed genes we expect read duplication to happen rarely by
+chance, while for highly expressed genes - depending on the total sequencing
+depth - we expect read duplication to happen often.
+
+A good way to learn if a dataset is following this trend is by relating the 
+normalized number of counts per gene (RPK, as a quantification of the gene 
+expression) and the fraction represented by duplicated reads.
+
+The `r Rpackage("dupRadar")` offers several functions to assess this 
+relationship. The most prominent may be the 2d density scatter plot:
+
+```{r fig.width=14,fig.height=7}
+## make a duprate plot (blue cloud)
+par(mfrow=c(1,2))
+duprateExpDensPlot(DupMat=dm)		# a good looking plot
+title("good experiment")
+duprateExpDensPlot(DupMat=dm.bad)	# a dataset with duplication problems
+title("duplication problems")
+
+## duprate boxplot
+duprateExpBoxplot(DupMat=dm)		# a good looking plot
+duprateExpBoxplot(DupMat=dm.bad)	# a dataset with duplication problems
+```
+
+The *duprateExpDensPlot* has helper lines at the threshold of 1 read/bp and at 
+0.5 RPKM. Moreover by default a generalized linear model is fit to the data and
+overplotted (see also below).
+
+## Fitting a model into the data
+
+To summarize the relationship between duplication rates and gene expression, 
+we propose fitting a logistic regression curve onto the data.  With the 
+coefficients of the fitted model, one can get an idea of the initial 
+duplication rate at low read counts (Intercept) and the progression of the 
+duplication rate along with the progression of the read counts (Slope).
+
+
+```{r fig.width=7,fig.height=7}
+duprateExpDensPlot(DupMat=dm)
+
+# or, just to get the fitted model without plot
+fit <- duprateExpFit(DupMat=dm)
+cat("duprate at low read counts: ",fit$intercept,"\n",
+	"progression of the duplication rate: ",fit$slope,fill=TRUE)
+```
+
+Our main use case for that function is the condensation
+of the plots into quality metrics that can be used for automatic flagging of 
+possibly problematic samples in large experiments or aggregation with other
+quality metrics in large tables to analyse interdependencies.
+
+
+The *duprateExpBoxplot* plot shows the range of the duplication rates at 5% 
+bins (default) along the distribution of RPK gene counts. The x-axis displays 
+the quantile of the RPK distribution, and the average RPK of the genes 
+contained in this quantile.
+
+Individual genes can be identified in the plot:
+
+```{r eval=FALSE}
+## INTERACTIVE: identify single points on screen (name="ID" column of dm)
+duprateExpPlot(DupMat=dm)		# a good looking plot
+duprateExpIdentify(DupMat=dm)
+```
+
+One can also call the function *duprateExpPlot* to get smooth color density 
+representation of the same data:
+
+```{r fig.width=14,fig.height=7}
+par(mfrow=c(1,2))
+cols <- colorRampPalette(c("black","blue","green","yellow","red"))
+duprateExpPlot(DupMat=dm,addLegend=FALSE)
+duprateExpPlot(DupMat=dm.bad,addLegend=FALSE,nrpoints=10,nbin=500,colramp=cols)
+```
+
+Any further parm sent to the *duprateExpPlot* is also sent to the 
+*smoothScatter* function.
+
+## Comparing the fitted parameters to other datasets
+
+The parameters of the fitted model may mean very little (or just nothing) for
+many, unless there's other data to compare with. We provide the pre-computed
+duplication matrices for all the RNA-Seq experiments publicly available from 
+the ENCODE project, for human and mouse.
+
+![alt text](figures/encode.svg)
+
+With the experience from the ENCODE datasets, we expect from single read 
+experiments little duplication at low RPKM (low **intercept**) rapidly rising 
+because of natural duplication (high **slope**). In contrast, paired-end
+experiments have a more mild rising of the natural duplication (low **slope**)
+due to having higher diversity of reads pairs since pairs with same start may
+still have different end.
+
+The common denominator for both designs is the importance of having a low
+intercept, suggesting that duplication rate at lowly expressed genes may serve
+as a quality measure.
+
+All the pre-computed duplication matrices are available in the [dupRadar Github site](https://github.com/ssayols/dupRadar).
+
+
+## Other plots
+
+**CAVEAT: Sometimes in discussions duplicate reads (i.e. two physically 
+different reads are mapped to the exact same position) and multi-mapping 
+reads (i.e. a single read is mapped to two or more locations in the genome)
+are mixed up. `r Rpackage("dupRadar")`'s main focus are PCR duplicates in RNA-Seq.
+However internally we keep track of unique mappers and multimappers, and we use
+both in some of the examples, to illustrate use cases of our package beyond
+the main aim. Multi-mapping reads are completely independent of PCR duplicates.**
+
+Apart from the plots relating RPK and duplication rate, the 
+`r Rpackage("dupRadar")` package provides some other useful plots to extract 
+information about the gene expression levels.
+
+An interesting quality metric are the fraction of reads taken up by groups of 
+genes binned by 5% expression levels. 
+```{r fig.width=7,fig.height=7}
+readcountExpBoxplot(DupMat=dm)
+```
+
+In the example we see that the 5% of 
+highest expressed genes in our sample data set take up around 60% of all reads.
+
+The distribution of RPK values per genes can be plotted with:
+
+```{r fig.width=7,fig.height=7}
+expressionHist(DupMat=dm)
+```
+
+This would help in identifying skewed distributions with unusual amount of 
+lowly expressed genes, or to detect no consensus between replicates.
+
+
+# Other information deduced from the data
+
+The duplication rate matrix calculated by the function *analyzeDuprates()* 
+contains some useful information about the sequencing experiment, that can be 
+used to assess the quality of the data.
+
+## Fraction of multimappers per gene
+
+Analogous to per gene duplication rate, the fraction of mutimappers 
+can be easily calculated fom the duplication matrix.
+
+Taking the counts from the column *allCountsMulti*, and substracting form it 
+the counts from the column *allCounts*, one can get the total number of 
+multihits. Thus, the fraction of multihits per gene can be calculating then 
+dividing by *allCountsMulti*.
+
+```{r}
+# calculate the fraction of multimappers per gene
+dm$mhRate <- (dm$allCountsMulti - dm$allCounts) / dm$allCountsMulti
+# how many genes are exclusively covered by multimappers
+sum(dm$mhRate == 1, na.rm=TRUE)
+
+# and how many have an RPKM (including multimappers) > 5
+sum(dm$mhRate==1 & dm$RPKMMulti > 5, na.rm=TRUE)
+
+# and which are they?
+dm[dm$mhRate==1 & dm$RPKMMulti > 5, "ID"]
+```
+
+We can also generate an overall picture about less extreme cases:
+```{r fig.width=7,fig.height=7}
+hist(dm$mhRate, 
+     breaks=50, 
+     col="red", 
+     main="Frequency of multimapping rates", 
+     xlab="Multimapping rate per gene", 
+     ylab="Frequency")
+
+```
+
+Also the direct comparison of reads per kilobase between uniquely and 
+multimappers is possible.
+
+```{r fig.width=7,fig.height=7}
+# comparison of multi-mapping RPK and uniquely-mapping RPK
+plot(log2(dm$RPK), 
+     log2(dm$RPKMulti), 
+     xlab="Reads per kb (uniquely mapping reads only)",
+     ylab="Reads per kb (all including multimappers, non-weighted)"
+)
+```
+
+Use with identify() to annotate interesting cases interactively.
+```{r, eval=F}
+identify(log2(dm$RPK), 
+         log2(dm$RPKMulti),
+	 labels=dm$ID)
+```
+
+## Connection between possible PCR artefacts and GC content
+In some cases we wondered about influence of GC content on PCR artefacts. An
+easy way to check using our `r Rpackage("dupRadar")` package in conjunction
+with `r Rpackage("biomaRt")` is demonstrated in the following. For simplicity
+we use our demo data here in which we *do not* see a big influence.
+
+
+```{r eval=F}
+library(dupRadar)
+library(biomaRt)
+
+## for detailed explanations on biomaRt, please see the respective
+## vignette
+
+## set up biomart connection for mouse (needs internet connection)
+ensm <- useMart("ensembl")
+ensm <- useDataset("mmusculus_gene_ensembl", mart=ensm)
+
+## get a table which has the gene GC content for the IDs that have been
+## used to generate the table (depends on the GTF annotation that you
+## use)
+tr <- getBM(attributes=c("mgi_symbol", "percentage_gc_content"),
+            values=TRUE, mart=ensm)
+
+## create a GC vector with IDs as element names
+mgi.gc <- tr$percentage_gc_content
+names(mgi.gc) <- tr$mgi_symbol
+
+## using dm demo duplication matrix that comes with the package
+## add GC content to our demo data and keep only subset for which we can
+## retrieve data
+keep <- dm$ID %in% tr$mgi_symbol
+dm.gc <- dm[keep,]
+dm.gc$gc <- mgi.gc[dm.gc$ID]
+
+## check distribution of annotated gene GC content (in %)
+boxplot(dm.gc$gc, main="Gene GC content", ylab="% GC")
+```
+
+![alt text](figures/GCcontent.svg)
+
+Now we can compare the dependence of duplication rate on expression level
+independently for below and above median GC genes (and to mention again,
+in this data set we don't have a big difference).
+
+```{r eval=F}
+par(mfrow=c(1,2))
+
+## below median GC genes
+duprateExpDensPlot(dm.gc[dm.gc$gc<=45,], main="below median GC genes")
+
+## above median GC genes
+duprateExpDensPlot(dm.gc[dm.gc$gc>=45,], main="above median GC genes")
+```
+
+![alt text](figures/GCdependence.png)
+
+# Conclusion
+
+The `r Rpackage("dupRadar")` package provides a framework for the analysis of 
+duplicate rates in RNAseq datasets. In addition, it includes a set of 
+convenient wrappers to correctly call some common tools used for marking PCR 
+duplicated reads in the BAM file. It's shipped as Bioconductor package in 
+order to offer a common framework for all the tools involved, and simplify its 
+use.
+
+# Including dupRadar into pipelines
+
+To include dupRadar as a single step in an RNA-Seq pipeline, integration into a short R-script can be done like in the following:
+
+```{r,eval=F}
+#!/usr/bin/env Rscript
+
+########################################
+##
+## dupRadar shell script
+## call dupRadar R package from the shell for 
+## easy integration into pipelines
+##
+## Holger Klein & Sergi Sayols
+##
+## https://github.com/ssayols/dupRadar
+##
+## input:
+## - _duplicate marked_ bam file
+## - gtf file
+## - parameters for duplication counting routine:
+##   stranded, paired, outdir, threads.
+##
+########################################
+
+library(dupRadar)
+
+####################
+##
+## get name patterns from command line
+##
+args   <- commandArgs(TRUE)
+
+## the bam file to analyse
+bam <- args[1]
+## usually, same GTF file as used in htseq-count
+gtf <- gsub("gtf=","",args[2])
+## no|yes|reverse
+stranded <- gsub("stranded=","",args[3])
+## is a paired end experiment
+paired   <- gsub("paired=","",args[4])
+## output directory
+outdir   <- gsub("outdir=","",args[5])
+## number of threads to be used
+threads  <- as.integer(gsub("threads=","",args[6]))
+
+if(length(args) != 6) { 
+  stop (paste0("Usage: ./dupRadar.sh <file.bam> <genes.gtf> ",
+			   "<stranded=[no|yes|reverse]> paired=[yes|no] ",
+			   "outdir=./ threads=1"))
+}
+
+if(!file.exists(bam)) { 
+  stop(paste("File",bam,"does NOT exist"))
+}
+
+if(!file.exists(gtf)) {
+  stop(paste("File",gtf,"does NOT exist"))
+}
+
+if(!file.exists(outdir)) { 
+  stop(paste("Dir",outdir,"does NOT exist"))
+}
+
+if(is.na(stranded) | !(grepl("no|yes|reverse",stranded))) {
+  stop("Stranded has to be no|yes|reverse")
+}
+
+if(is.na(paired) | !(grepl("no|yes",paired))) {
+  stop("Paired has to be no|yes")
+}
+
+if(is.na(threads)) {
+  stop("Threads has to be an integer number")
+}
+
+stranded <- if(stranded == "no") 0 else if(stranded == "yes") 1 else 2
+
+## end command line parsing
+##
+########################################
+
+########################################
+##
+## analyze duprates and create plots
+##
+cat("Processing file ", bam, " with GTF ", gtf, "\n")
+
+## calculate duplication rate matrix
+dm <- analyzeDuprates(bam,
+                      gtf,
+                      stranded,
+                      (paired == "yes"),
+                      threads)
+
+## produce plots
+
+## duprate vs. expression smooth scatter
+png(file=paste0(outdir,"/",gsub("(.*)\\.[^.]+","\\1",basename(bam)),"_dupRadar_drescatter.png"),
+    width=1000, height=1000)
+duprateExpDensPlot(dm, main=basename(bam))
+dev.off()
+
+## expression histogram
+png(file=paste0(outdir,"/",gsub("(.*)\\.[^.]+","\\1",basename(bam)),"_dupRadar_ehist.png"),
+    width=1000, height=1000)
+expressionHist(dm)
+dev.off()
+
+## duprate vs. expression boxplot
+png(file=paste0(outdir,"/",gsub("(.*)\\.[^.]+","\\1",basename(bam)),"_dupRadar_drebp.png"),
+    width=1000, height=1000)
+par(mar=c(10,4,4,2)+.1)
+duprateExpBoxplot(dm, main=basename(bam))
+dev.off()
+```
+
+## Citing dupRadar
+
+Please consider citing dupRadar if used in support of your own research:
+
+```{r citation}
+citation("dupRadar")
+```
+
+## Reporting problems or bugs
+
+If you run into problems using dupRadar, the [Bioconductor Support site](https://support.bioconductor.org/) is a good first place to ask for help. If you think there is a bug or an unreported feature, you can report it using the [dupRadar Github site](https://github.com/ssayols/dupRadar).
+
+# Session info
+
+The following package and versions were used in the production of this vignette.
+
+```{r echo=FALSE}
+sessionInfo()
+```
\ No newline at end of file
diff --git a/src/dupradar/test.sh b/src/dupradar/test.sh
new file mode 100755
index 00000000..99610357
--- /dev/null
+++ b/src/dupradar/test.sh
@@ -0,0 +1,107 @@
+#!/bin/bash
+
+# define input and output for script
+input_bam="${meta_resources_dir}/test_data/sample.bam"
+input_gtf="${meta_resources_dir}/test_data/genes.gtf"
+
+output_dupmatrix="dup_matrix.txt"
+output_dup_intercept_mqc="dup_intercept_mqc.txt"
+output_duprate_exp_boxplot="duprate_exp_boxplot.pdf"
+output_duprate_exp_densplot="duprate_exp_densityplot.pdf"
+output_duprate_exp_denscurve_mqc="duprate_exp_density_curve_mqc.txt"
+output_expression_histogram="expression_hist.pdf"
+output_intercept_slope="intercept_slope.txt"
+
+# Run executable
+echo "> Running $meta_functionality_name for unpaired reads, writing to tmpdir $tmpdir."
+
+"$meta_executable" \
+    --input "$input_bam" \
+    --id "test" \
+    --gtf_annotation "$input_gtf" \
+    --strandedness 1 \
+    --output_dupmatrix $output_dupmatrix \
+    --output_dup_intercept_mqc $output_dup_intercept_mqc \
+    --output_duprate_exp_boxplot $output_duprate_exp_boxplot \
+    --output_duprate_exp_densplot $output_duprate_exp_densplot \
+    --output_duprate_exp_denscurve_mqc $output_duprate_exp_denscurve_mqc \
+    --output_expression_histogram $output_expression_histogram \
+    --output_intercept_slope $output_intercept_slope
+
+exit_code=$?
+[[ $exit_code != 0 ]] && echo "Non zero exit code: $exit_code" && exit 1
+
+echo ">> asserting output has been created for paired read input"
+[ ! -f "$output_dupmatrix" ] && echo "$output_dupmatrix was not created" && exit 1
+[ ! -s "$output_dupmatrix" ] && echo "$output_dupmatrix is empty" && exit 1
+[ ! -f "$output_dup_intercept_mqc" ] && echo "$output_dup_intercept_mqc was not created" && exit 1
+[ ! -s "$output_dup_intercept_mqc" ] && echo "$output_dup_intercept_mqc is empty" && exit 1
+[ ! -f "$output_duprate_exp_boxplot" ] && echo "$output_duprate_exp_boxplot was not created" && exit 1
+[ ! -s "$output_duprate_exp_boxplot" ] && echo "$output_duprate_exp_boxplot is empty" && exit 1
+[ ! -f "$output_duprate_exp_densplot" ] && echo "$output_duprate_exp_densplot was not created" && exit 1
+[ ! -s "$output_duprate_exp_densplot" ] && echo "$output_duprate_exp_densplot is empty" && exit 1
+[ ! -f "$output_duprate_exp_denscurve_mqc" ] && echo "$output_duprate_exp_denscurve_mqc was not created" && exit 1
+[ ! -s "$output_duprate_exp_denscurve_mqc" ] && echo "$output_duprate_exp_denscurve_mqc is empty" && exit 1
+[ ! -f "$output_expression_histogram" ] && echo "$output_expression_histogram was not created" && exit 1
+[ ! -s "$output_expression_histogram" ] && echo "$output_expression_histogram is empty" && exit 1
+[ ! -f "$output_intercept_slope" ] && echo "$output_intercept_slope was not created" && exit 1
+[ ! -s "$output_intercept_slope" ] && echo "$output_intercept_slope is empty" && exit 1
+
+
+echo ">> Check if output is empty"
+[ ! -s "$output_dupmatrix" ] \
+    && echo "Output file $output_dupmatrix is empty" && exit 1
+[ ! -s "$output_dup_intercept_mqc" ] \
+    && echo "Output file $output_dup_intercept_mqc is empty" && exit 1
+[ ! -s "$output_intercept_slope" ] \
+    && echo "Output file $output_intercept_slope is empty" && exit 1
+
+echo ">> Check if output is correct"
+# diff ignoring white spaces
+diff -B -b "$meta_resources_dir/test_data/test_dupMatrix.txt" "${meta_resources_dir}/test_data/test_dupMatrix.txt" || \
+    { echo "Output file $output_dupmatrix is not correct"; exit 1; }
+diff -B -b "$output_duprate_exp_denscurve_mqc" "${meta_resources_dir}/test_data/test_duprateExpDensCurve_mqc.txt" || \
+    { echo "Output file $output_duprate_exp_denscurve_mqc is not correct"; exit 1; }
+diff -B -b "$output_intercept_slope" "${meta_resources_dir}/test_data/test_intercept_slope.txt" || \
+    { echo "Output file $output_intercept_slope is not correct"; exit 1; }
+
+
+echo ">>> Test 2: Example without specified output files"
+
+"$meta_executable" \
+    --input "$input_bam" \
+    --id "test" \
+    --gtf_annotation "$input_gtf" \
+    --strandedness 1
+
+exit_code=$?
+[[ $exit_code != 0 ]] && echo "Non zero exit code: $exit_code" && exit 1
+
+echo ">> asserting output has been created for paired read input"
+# output files with original names from DupRadar and id prefix
+[ ! -f "test_dupMatrix.txt" ] && echo "test_dupMatrix.txt was not created" && exit 1
+[ ! -s "test_dupMatrix.txt" ] && echo "test_dupMatrix.txt is empty" && exit 1
+[ ! -f "test_dup_intercept_mqc.txt" ] && echo "test_dup_intercept_mqc.txt was not created" && exit 1
+[ ! -s "test_dup_intercept_mqc.txt" ] && echo "test_dup_intercept_mqc.txt is empty" && exit 1
+[ ! -f "test_duprateExpBoxplot.pdf" ] && echo "test_duprateExpBoxplot.pdf was not created" && exit 1
+[ ! -s "test_duprateExpBoxplot.pdf" ] && echo "test_duprateExpBoxplot.pdf is empty" && exit 1
+[ ! -f "test_duprate_exp_densplot.pdf" ] && echo "test_duprate_exp_densplot.pdf was not created" && exit 1
+[ ! -s "test_duprate_exp_densplot.pdf" ] && echo "test_duprate_exp_densplot.pdf is empty" && exit 1
+[ ! -f "test_duprateExpDensCurve_mqc.txt" ] && echo "test_duprateExpDensCurve_mqc.txt was not created" && exit 1
+[ ! -s "test_duprateExpDensCurve_mqc.txt" ] && echo "test_duprateExpDensCurve_mqc.txt is empty" && exit 1
+[ ! -f "test_expressionHist.pdf" ] && echo "test_expressionHist.pdf was not created" && exit 1
+[ ! -s "test_expressionHist.pdf" ] && echo "test_expressionHist.pdf is empty" && exit 1
+[ ! -f "test_intercept_slope.txt" ] && echo "test_intercept_slope.txt was not created" && exit 1
+[ ! -s "test_intercept_slope.txt" ] && echo "test_intercept_slope.txt is empty" && exit 1
+
+
+echo ">> Check if output is correct"
+diff -B -b "test_dupMatrix.txt" "${meta_resources_dir}/test_data/test_dupMatrix.txt" || \
+    { echo "Output file test_dupMatrix.txt is not correct"; exit 1; }
+diff -B -b "test_duprateExpDensCurve_mqc.txt" "${meta_resources_dir}/test_data/test_duprateExpDensCurve_mqc.txt" || \
+    { echo "Output file test_duprateExpDensCurve_mqc.txt is not correct"; exit 1; }
+diff -B -b "test_intercept_slope.txt" "${meta_resources_dir}/test_data/test_intercept_slope.txt" || \
+    { echo "Output file test_intercept_slope.txt is not correct"; exit 1; }
+
+echo ">>> All tests passed"
+exit 0
\ No newline at end of file
diff --git a/src/dupradar/test_data/genes.gtf b/src/dupradar/test_data/genes.gtf
new file mode 100644
index 00000000..423ba8f8
--- /dev/null
+++ b/src/dupradar/test_data/genes.gtf
@@ -0,0 +1,36 @@
+chr1	unknown	exon	14362	14829	.	-	.	gene_id "WASH7P"; gene_name "WASH7P"; transcript_id "NR_024540"; tss_id "TSS7514";
+chr1	unknown	exon	14970	15038	.	-	.	gene_id "WASH7P"; gene_name "WASH7P"; transcript_id "NR_024540"; tss_id "TSS7514";
+chr1	unknown	exon	15796	15947	.	-	.	gene_id "WASH7P"; gene_name "WASH7P"; transcript_id "NR_024540"; tss_id "TSS7514";
+chr1	unknown	exon	16607	16765	.	-	.	gene_id "WASH7P"; gene_name "WASH7P"; transcript_id "NR_024540"; tss_id "TSS7514";
+chr1	unknown	exon	16858	17055	.	-	.	gene_id "WASH7P"; gene_name "WASH7P"; transcript_id "NR_024540"; tss_id "TSS7514";
+chr1	unknown	exon	17233	17368	.	-	.	gene_id "WASH7P"; gene_name "WASH7P"; transcript_id "NR_024540"; tss_id "TSS7514";
+chr1	unknown	exon	17606	17742	.	-	.	gene_id "WASH7P"; gene_name "WASH7P"; transcript_id "NR_024540"; tss_id "TSS7514";
+chr1	unknown	exon	17915	18061	.	-	.	gene_id "WASH7P"; gene_name "WASH7P"; transcript_id "NR_024540"; tss_id "TSS7514";
+chr1	unknown	exon	18268	18366	.	-	.	gene_id "WASH7P"; gene_name "WASH7P"; transcript_id "NR_024540"; tss_id "TSS7514";
+chr1	unknown	exon	24738	24891	.	-	.	gene_id "WASH7P"; gene_name "WASH7P"; transcript_id "NR_024540"; tss_id "TSS7514";
+chr1	unknown	exon	29321	29370	.	-	.	gene_id "WASH7P"; gene_name "WASH7P"; transcript_id "NR_024540"; tss_id "TSS7514";
+chr1	unknown	exon	34611	35174	.	-	.	gene_id "FAM138A"; gene_name "FAM138A"; transcript_id "NR_026818_1"; tss_id "TSS8403";
+chr1	unknown	exon	34611	35174	.	-	.	gene_id "FAM138F"; gene_name "FAM138F"; transcript_id "NR_026820_1"; tss_id "TSS8403";
+chr1	unknown	exon	35277	35481	.	-	.	gene_id "FAM138A"; gene_name "FAM138A"; transcript_id "NR_026818_1"; tss_id "TSS8403";
+chr1	unknown	exon	35277	35481	.	-	.	gene_id "FAM138F"; gene_name "FAM138F"; transcript_id "NR_026820_1"; tss_id "TSS8403";
+chr1	unknown	exon	35721	36081	.	-	.	gene_id "FAM138A"; gene_name "FAM138A"; transcript_id "NR_026818_1"; tss_id "TSS8403";
+chr1	unknown	exon	35721	36081	.	-	.	gene_id "FAM138F"; gene_name "FAM138F"; transcript_id "NR_026820_1"; tss_id "TSS8403";
+chr1	unknown	CDS	69091	70005	.	+	0	gene_id "OR4F5"; gene_name "OR4F5"; p_id "P1230"; transcript_id "NM_001005484"; tss_id "TSS14428";
+chr1	unknown	exon	69091	70008	.	+	.	gene_id "OR4F5"; gene_name "OR4F5"; p_id "P1230"; transcript_id "NM_001005484"; tss_id "TSS14428";
+chr1	unknown	start_codon	69091	69093	.	+	.	gene_id "OR4F5"; gene_name "OR4F5"; p_id "P1230"; transcript_id "NM_001005484"; tss_id "TSS14428";
+chr1	unknown	stop_codon	70006	70008	.	+	.	gene_id "OR4F5"; gene_name "OR4F5"; p_id "P1230"; transcript_id "NM_001005484"; tss_id "TSS14428";
+chr1	unknown	exon	134773	139696	.	-	.	gene_id "LOC729737"; gene_name "LOC729737"; transcript_id "NR_039983"; tss_id "TSS18541";
+chr1	unknown	exon	139790	139847	.	-	.	gene_id "LOC729737"; gene_name "LOC729737"; transcript_id "NR_039983"; tss_id "TSS18541";
+chr1	unknown	exon	140075	140566	.	-	.	gene_id "LOC729737"; gene_name "LOC729737"; transcript_id "NR_039983"; tss_id "TSS18541";
+chr1	unknown	exon	323892	324060	.	+	.	gene_id "LOC100132287"; gene_name "LOC100132287"; transcript_id "NR_028322_1"; tss_id "TSS12303";
+chr1	unknown	exon	324288	324345	.	+	.	gene_id "LOC100132287"; gene_name "LOC100132287"; transcript_id "NR_028322_1"; tss_id "TSS12303";
+chr1	unknown	exon	324439	328581	.	+	.	gene_id "LOC100132287"; gene_name "LOC100132287"; transcript_id "NR_028322_1"; tss_id "TSS12303";
+chr19	unknown	exon	76220	76783	.	-	.	gene_id "FAM138F"; gene_name "FAM138F"; transcript_id "NR_026820"; tss_id "TSS16829";
+chr19	unknown	exon	76220	76783	.	-	.	gene_id "FAM138A"; gene_name "FAM138A"; transcript_id "NR_026818"; tss_id "TSS16829";
+chr19	unknown	exon	76886	77090	.	-	.	gene_id "FAM138F"; gene_name "FAM138F"; transcript_id "NR_026820"; tss_id "TSS16829";
+chr19	unknown	exon	76886	77090	.	-	.	gene_id "FAM138A"; gene_name "FAM138A"; transcript_id "NR_026818"; tss_id "TSS16829";
+chr19	unknown	exon	77330	77690	.	-	.	gene_id "FAM138F"; gene_name "FAM138F"; transcript_id "NR_026820"; tss_id "TSS16829";
+chr19	unknown	exon	77330	77690	.	-	.	gene_id "FAM138A"; gene_name "FAM138A"; transcript_id "NR_026818"; tss_id "TSS16829";
+chr5	unknown	exon	180750507	180750675	.	+	.	gene_id "LOC100132287"; gene_name "LOC100132287"; transcript_id "NR_028322"; tss_id "TSS11538";
+chr5	unknown	exon	180750903	180750960	.	+	.	gene_id "LOC100132287"; gene_name "LOC100132287"; transcript_id "NR_028322"; tss_id "TSS11538";
+chr5	unknown	exon	180751054	180755196	.	+	.	gene_id "LOC100132287"; gene_name "LOC100132287"; transcript_id "NR_028322"; tss_id "TSS11538";
diff --git a/src/dupradar/test_data/sample.bam b/src/dupradar/test_data/sample.bam
new file mode 100644
index 00000000..04d8c95e
Binary files /dev/null and b/src/dupradar/test_data/sample.bam differ
diff --git a/src/dupradar/test_data/script.sh b/src/dupradar/test_data/script.sh
new file mode 100644
index 00000000..2b1c1aa5
--- /dev/null
+++ b/src/dupradar/test_data/script.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+
+wget https://github.com/ssayols/dupRadar/raw/master/inst/extdata/genes.gtf
+wget https://github.com/ssayols/dupRadar/raw/master/inst/extdata/wgEncodeCaltechRnaSeqGm12878R1x75dAlignsRep2V2.bam 
+
+# subsample the bam file
+samtools view -s 0.02 -b wgEncodeCaltechRnaSeqGm12878R1x75dAlignsRep2V2.bam > sample.bam
+
+rm wgEncodeCaltechRnaSeqGm12878R1x75dAlignsRep2V2.bam
\ No newline at end of file
diff --git a/src/dupradar/test_data/test_dupMatrix.txt b/src/dupradar/test_data/test_dupMatrix.txt
new file mode 100644
index 00000000..9035e993
--- /dev/null
+++ b/src/dupradar/test_data/test_dupMatrix.txt
@@ -0,0 +1,7 @@
+ID	geneLength	allCountsMulti	filteredCountsMulti	dupRateMulti	dupsPerIdMulti	RPKMulti	RPKMMulti	allCounts	filteredCounts	dupRate	dupsPerId	RPK	RPKM
+WASH7P	1769	41	41	0	0	23.1769361221029	188430.374976446	1	1	0	0	0.565291124929339	4595.86280430357
+FAM138A	2260	0	0	NA	0	0	0	0	0	NA	0	0	0
+FAM138F	2260	0	0	NA	0	0	0	0	0	NA	0	0	0
+OR4F5	918	0	0	NA	0	0	0	0	0	NA	0	0	0
+LOC729737	5474	18	18	0	0	3.28827183047132	26733.917320905	3	3	0	0	0.548045305078553	4455.65288681751
+LOC100132287	8740	39	39	0	0	4.46224256292906	36278.3948205615	1	1	0	0	0.11441647597254	930.215251809269
diff --git a/src/dupradar/test_data/test_duprateExpDensCurve_mqc.txt b/src/dupradar/test_data/test_duprateExpDensCurve_mqc.txt
new file mode 100644
index 00000000..ca9f199e
--- /dev/null
+++ b/src/dupradar/test_data/test_duprateExpDensCurve_mqc.txt
@@ -0,0 +1,33 @@
+#id: dupradar
+#section_name: 'DupRadar'
+#section_href: 'bioconductor.org/packages/release/bioc/html/dupRadar.html'
+#description: "provides duplication rate quality control for RNA-Seq datasets. Highly expressed genes can be expected to have a lot of duplicate reads, but high numbers of duplicates at low read counts can indicate low library complexity with technical duplication.
+#    This plot shows the general linear models - a summary of the gene duplication distributions. "
+#pconfig:
+#    title: 'DupRadar General Linear Model'
+#    xLog: True
+#    xlab: 'expression (reads/kbp)'
+#    ylab: '% duplicate reads'
+#    ymax: 100
+#    ymin: 0
+#    tt_label: '<b>{point.x:.1f} reads/kbp</b>: {point.y:,.2f}% duplicates'
+#    xPlotLines:
+#        - color: 'green'
+#          dashStyle: 'LongDash'
+#          label:
+#                style: {color: 'green'}
+#                text: '0.5 RPKM'
+#                verticalAlign: 'bottom'
+#                y: -65
+#          value: 0.5
+#          width: 1
+#        - color: 'red'
+#          dashStyle: 'LongDash'
+#          label:
+#                style: {color: 'red'}
+#                text: '1 read/bp'
+#                verticalAlign: 'bottom'
+#                y: -65
+#          value: 1000
+#          width: 1
+0.11441647597254 5.82621463896864e-09
diff --git a/src/dupradar/test_data/test_intercept_slope.txt b/src/dupradar/test_data/test_intercept_slope.txt
new file mode 100644
index 00000000..5294b8ba
--- /dev/null
+++ b/src/dupradar/test_data/test_intercept_slope.txt
@@ -0,0 +1,2 @@
+test - dupRadar Int (duprate at low read counts): 5.8262146393079e-11 
+test - dupRadar Sl (progression of the duplication rate): 0.999999999999966