diff --git a/modules/nf-core/affy/justrma/main.nf b/modules/nf-core/affy/justrma/main.nf index 18eb7c283552..956968654974 100644 --- a/modules/nf-core/affy/justrma/main.nf +++ b/modules/nf-core/affy/justrma/main.nf @@ -12,9 +12,10 @@ process AFFY_JUSTRMA { tuple val(meta2), path(description) output: - tuple val(meta), path("*.rds"), emit: rds - tuple val(meta), path("*.tsv"), emit: tsv - path "versions.yml" , emit: versions + tuple val(meta), path("*.rds") , emit: rds + tuple val(meta), path("matrix.tsv") , emit: expression + tuple val(meta), path("*.annotation.tsv") , emit: annotation, optional: true + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when diff --git a/modules/nf-core/affy/justrma/meta.yml b/modules/nf-core/affy/justrma/meta.yml index b277c4dfa1c1..1f57087fc91a 100644 --- a/modules/nf-core/affy/justrma/meta.yml +++ b/modules/nf-core/affy/justrma/meta.yml @@ -39,11 +39,15 @@ output: description: | Groovy Map containing sample information e.g. [ id:'test' ] - - tsv: + - expression: type: file description: TSV-format intensity matrix - pattern: "*.tsv" - - rdata: + pattern: "matrix.tsv" + - annotation: + type: file + description: TSV-format annotation table + pattern: "*.annotation.tsv" + - rds: type: file description: Serialised ExpressionSet object pattern: "*.rds" diff --git a/modules/nf-core/affy/justrma/templates/affy_justrma.R b/modules/nf-core/affy/justrma/templates/affy_justrma.R index 9411a1b0f620..d9af76c9b237 100755 --- a/modules/nf-core/affy/justrma/templates/affy_justrma.R +++ b/modules/nf-core/affy/justrma/templates/affy_justrma.R @@ -54,15 +54,31 @@ read_delim_flexible <- function(file, header = TRUE, row.names = NULL){ #' Install the right CDF for a given cel file #' #' @param celfile A valid path to a CEL file +#' @param annotation Boolean indication wheter to install the annotation +#' package #' #' @return output The CDF environment or a list detailing the failed locations. -install_cdf <- function(celfile){ +install_cdf_db <- function(celfile, annotation = FALSE){ library(affyio) headdetails <- read.celfile.header(celfile) ref.cdfName <- headdetails[[1]] cleaned.cdfName <- cleancdfname(ref.cdfName, addcdf = FALSE) - cdfFromBioC(paste0(cleaned.cdfName, 'cdf')) + + exts = 'cdf' + if (annotation){ + exts <- c(exts, '.db') + } + + for (package in paste0(cleaned.cdfName, exts)){ + install.packages( + package, + lib = 'libs', + repos = BiocManager::repositories(), + dependencies = c("Depends", "Imports") + ) + } + cleaned.cdfName } #' Round numeric dataframe columns to fixed decimal places by applying @@ -116,7 +132,8 @@ opt <- list( cdfname = NULL, rm.mask = FALSE, rm.outliers = FALSE, - rm.extra = FALSE + rm.extra = FALSE, + build_annotation = FALSE ) if (opt\$description == ''){ opt\$description = NULL @@ -183,7 +200,7 @@ if (! opt\$file_name_col %in% colnames(sample.sheet)){ dir.create('libs') .libPaths('libs') first_cel <- file.path(opt\$celfiles_dir, sample.sheet[[opt\$file_name_col]][1]) -install_cdf(first_cel) +cdf_name <- install_cdf_db(first_cel, annotation = opt\$build_annotation) # Run the main function @@ -213,6 +230,41 @@ sampleNames(eset) <- sample.sheet[[opt\$sample_name_col]] ################################################ ################################################ +if (opt\$build_annotation){ + + # Make some annotation + + dbname <- paste0(cdf_name, '.db') + library(dbname, character.only = TRUE) + anno <- select( + get(dbname), + keys=keys(get(dbname), keytype="PROBEID"), + columns=c('ENSEMBL', 'ENTREZID', 'SYMBOL', 'GENENAME', 'GENETYPE'), + keytype="PROBEID" + ) + + # Remove duplicates by probe + anno <- do.call( + rbind, + lapply( + split( + anno, + anno\$PROBEID + ), + function(x) apply(x, 2, function(y) paste(unique(y), collapse=',')) + ) + ) + + write.table( + anno, + file = paste0(cdf_name, '.annotation.tsv'), + col.names = TRUE, + row.names = FALSE, + sep = '\t', + quote = FALSE + ) +} + # R object for other processes to use saveRDS(eset, file = 'eset.rds') diff --git a/tests/modules/nf-core/affy/justrma/nextflow.config b/tests/modules/nf-core/affy/justrma/nextflow.config index f388bd2c055f..92e6be7acd40 100644 --- a/tests/modules/nf-core/affy/justrma/nextflow.config +++ b/tests/modules/nf-core/affy/justrma/nextflow.config @@ -3,7 +3,7 @@ process { publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } withName: 'test_affy_justrma:AFFY_JUSTRMA' { - ext.args = '--sample_name_col name' + ext.args = '--sample_name_col name --build_annotation TRUE' } withName: UNTAR { publishDir = [ enabled: false ] diff --git a/tests/modules/nf-core/affy/justrma/test.yml b/tests/modules/nf-core/affy/justrma/test.yml index 31d54e268832..0917f2489240 100644 --- a/tests/modules/nf-core/affy/justrma/test.yml +++ b/tests/modules/nf-core/affy/justrma/test.yml @@ -1,10 +1,12 @@ - name: affy justrma test_affy_justrma command: nextflow run ./tests/modules/nf-core/affy/justrma -entry test_affy_justrma -c ./tests/config/nextflow.config -c ./tests/modules/nf-core/affy/justrma/nextflow.config tags: - - affy - affy/justrma + - affy files: - path: output/affy/eset.rds + - path: output/affy/hgu133a2.annotation.tsv + md5sum: 3da296cb3b092bf839dda54ebb4fd313 - path: output/affy/matrix.tsv md5sum: 462665542e9531f6d8993b745ce78ae4 - path: output/affy/versions.yml