Skip to content

Commit

Permalink
derive annotation from .db packages for affy (nf-core#2825)
Browse files Browse the repository at this point in the history
* derive annotation from .db packages for affy

* Appease eclint

* Minor fixes

---------

Co-authored-by: Maxime U. Garcia <[email protected]>
  • Loading branch information
2 people authored and samfulcrum committed Feb 27, 2023
1 parent 88810e1 commit e460148
Show file tree
Hide file tree
Showing 5 changed files with 71 additions and 12 deletions.
7 changes: 4 additions & 3 deletions modules/nf-core/affy/justrma/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,10 @@ process AFFY_JUSTRMA {
tuple val(meta2), path(description)

output:
tuple val(meta), path("*.rds"), emit: rds
tuple val(meta), path("*.tsv"), emit: tsv
path "versions.yml" , emit: versions
tuple val(meta), path("*.rds") , emit: rds
tuple val(meta), path("matrix.tsv") , emit: expression
tuple val(meta), path("*.annotation.tsv") , emit: annotation, optional: true
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when
Expand Down
10 changes: 7 additions & 3 deletions modules/nf-core/affy/justrma/meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,15 @@ output:
description: |
Groovy Map containing sample information
e.g. [ id:'test' ]
- tsv:
- expression:
type: file
description: TSV-format intensity matrix
pattern: "*.tsv"
- rdata:
pattern: "matrix.tsv"
- annotation:
type: file
description: TSV-format annotation table
pattern: "*.annotation.tsv"
- rds:
type: file
description: Serialised ExpressionSet object
pattern: "*.rds"
Expand Down
60 changes: 56 additions & 4 deletions modules/nf-core/affy/justrma/templates/affy_justrma.R
Original file line number Diff line number Diff line change
Expand Up @@ -54,15 +54,31 @@ read_delim_flexible <- function(file, header = TRUE, row.names = NULL){
#' Install the right CDF for a given cel file
#'
#' @param celfile A valid path to a CEL file
#' @param annotation Boolean indication wheter to install the annotation
#' package
#'
#' @return output The CDF environment or a list detailing the failed locations.

install_cdf <- function(celfile){
install_cdf_db <- function(celfile, annotation = FALSE){
library(affyio)
headdetails <- read.celfile.header(celfile)
ref.cdfName <- headdetails[[1]]
cleaned.cdfName <- cleancdfname(ref.cdfName, addcdf = FALSE)
cdfFromBioC(paste0(cleaned.cdfName, 'cdf'))

exts = 'cdf'
if (annotation){
exts <- c(exts, '.db')
}

for (package in paste0(cleaned.cdfName, exts)){
install.packages(
package,
lib = 'libs',
repos = BiocManager::repositories(),
dependencies = c("Depends", "Imports")
)
}
cleaned.cdfName
}

#' Round numeric dataframe columns to fixed decimal places by applying
Expand Down Expand Up @@ -116,7 +132,8 @@ opt <- list(
cdfname = NULL,
rm.mask = FALSE,
rm.outliers = FALSE,
rm.extra = FALSE
rm.extra = FALSE,
build_annotation = FALSE
)
if (opt\$description == ''){
opt\$description = NULL
Expand Down Expand Up @@ -183,7 +200,7 @@ if (! opt\$file_name_col %in% colnames(sample.sheet)){
dir.create('libs')
.libPaths('libs')
first_cel <- file.path(opt\$celfiles_dir, sample.sheet[[opt\$file_name_col]][1])
install_cdf(first_cel)
cdf_name <- install_cdf_db(first_cel, annotation = opt\$build_annotation)

# Run the main function

Expand Down Expand Up @@ -213,6 +230,41 @@ sampleNames(eset) <- sample.sheet[[opt\$sample_name_col]]
################################################
################################################

if (opt\$build_annotation){

# Make some annotation

dbname <- paste0(cdf_name, '.db')
library(dbname, character.only = TRUE)
anno <- select(
get(dbname),
keys=keys(get(dbname), keytype="PROBEID"),
columns=c('ENSEMBL', 'ENTREZID', 'SYMBOL', 'GENENAME', 'GENETYPE'),
keytype="PROBEID"
)

# Remove duplicates by probe
anno <- do.call(
rbind,
lapply(
split(
anno,
anno\$PROBEID
),
function(x) apply(x, 2, function(y) paste(unique(y), collapse=','))
)
)

write.table(
anno,
file = paste0(cdf_name, '.annotation.tsv'),
col.names = TRUE,
row.names = FALSE,
sep = '\t',
quote = FALSE
)
}

# R object for other processes to use

saveRDS(eset, file = 'eset.rds')
Expand Down
2 changes: 1 addition & 1 deletion tests/modules/nf-core/affy/justrma/nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }

withName: 'test_affy_justrma:AFFY_JUSTRMA' {
ext.args = '--sample_name_col name'
ext.args = '--sample_name_col name --build_annotation TRUE'
}
withName: UNTAR {
publishDir = [ enabled: false ]
Expand Down
4 changes: 3 additions & 1 deletion tests/modules/nf-core/affy/justrma/test.yml
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
- name: affy justrma test_affy_justrma
command: nextflow run ./tests/modules/nf-core/affy/justrma -entry test_affy_justrma -c ./tests/config/nextflow.config -c ./tests/modules/nf-core/affy/justrma/nextflow.config
tags:
- affy
- affy/justrma
- affy
files:
- path: output/affy/eset.rds
- path: output/affy/hgu133a2.annotation.tsv
md5sum: 3da296cb3b092bf839dda54ebb4fd313
- path: output/affy/matrix.tsv
md5sum: 462665542e9531f6d8993b745ce78ae4
- path: output/affy/versions.yml

0 comments on commit e460148

Please sign in to comment.