diff --git a/DESCRIPTION b/DESCRIPTION index 06f7ba6..039db16 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: CytoPipeline Title: Automation and visualization of flow cytometry data analysis pipelines -Version: 1.3.5 +Version: 1.3.6 Authors@R: c(person(given = "Philippe", family = "Hauchamps", @@ -71,6 +71,8 @@ Suggests: diffviewer, knitr, rmarkdown, - BiocStyle + BiocStyle, + reshape2, + dplyr VignetteBuilder: knitr Config/testthat/edition: 3 diff --git a/NEWS.md b/NEWS.md index fbee785..b77f7de 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,9 @@ # CytoPipeline 1.3 +## CytoPipeline 1.3.6 +- `execute()` now stores the nb of events retained at each pre-processing step, +to speed-up `collectNbOfRetainedEvents()` + ## CytoPipeline 1.3.5 - added CITATION file diff --git a/R/CytoPipeline-functions.R b/R/CytoPipeline-functions.R index 5ed42e0..919f18e 100644 --- a/R/CytoPipeline-functions.R +++ b/R/CytoPipeline-functions.R @@ -825,6 +825,12 @@ execute <- function(x, as.json.CytoProcessingStep( x@flowFramesPreProcessingQueue[[s]] ) + # specific for pre-processing step generating flow frames: + # store nb of events at step + nEvents <- 0 + if (inherits(res, "flowFrame")) { + nEvents <- flowCore::nrow(res) + } genericMeta <- data.frame(list( rid = names(cacheResourceFile), @@ -838,7 +844,8 @@ execute <- function(x, preprocessingMeta <- data.frame(list( rid = names(cacheResourceFile), - fcsfile = basename(file) + fcsfile = basename(file), + nEvents = nEvents )) BiocFileCache::bfcmeta(bfc, name = "generic", append = TRUE) <- @@ -2043,64 +2050,104 @@ collectNbOfRetainedEvents <- function( if (missing(whichSampleFiles)) { whichSampleFiles <- CytoPipeline::sampleFiles(pipL) + } else if (is.numeric(whichSampleFiles)) { + sF <- CytoPipeline::sampleFiles(pipL) + if (min(whichSampleFiles) < 1 || max(whichSampleFiles) > length(sF)) { + stop("whichSampleFiles out of bounds") + } + whichSampleFiles <- CytoPipeline::sampleFiles(pipL)[whichSampleFiles] } else if (is.character(whichSampleFiles)) { whichSampleFiles <- basename(whichSampleFiles) + } else { + stop("whichSampleFiles should be either character or numeric") } - nEventPerSampleList <- list() - allStepNames <- c() - for (s in seq_along(whichSampleFiles)) { - message("Collecting nb of events for sample file ", - whichSampleFiles[s], - "...") - objInfos <- CytoPipeline::getCytoPipelineObjectInfos( - pipL, - whichQueue = "pre-processing", - sampleFile = whichSampleFiles[s], - path = path) - objInfos <- objInfos[objInfos[,"ObjectClass"] == "flowFrame",] + cacheDir <- file.path(path, experimentName, ".cache") + bfc <- BiocFileCache::BiocFileCache(cacheDir, ask = FALSE) + + metaPrepDF <- BiocFileCache::bfcmeta(bfc, name = "preprocessing") + + if ("nEvents" %in% colnames(metaPrepDF)) { + # as of CytoPipeline version 1.3.6, + # nb of events are stored for each step + cacheInfo <- BiocFileCache::bfcinfo(bfc) + metaPrepDF <- metaPrepDF[metaPrepDF$fcsfile %in% whichSampleFiles,] + DFM <- merge(x = metaPrepDF, y = cacheInfo, by = "rid") + #browser() + DFM <- + DFM[order(DFM$fcsfile.x, DFM$stepNb),][ + ,c("rid","fcsfile.x","stepNb", "stepName", "nEvents.x")] + allStepNames <- unique(DFM$stepName) + nSampleFiles <- length(whichSampleFiles) + nAllSteps <- length(allStepNames) + eventNbs <- matrix(rep(NA, nSampleFiles * nAllSteps), + nrow = nSampleFiles) + rownames(eventNbs) <- as.character(whichSampleFiles) + colnames(eventNbs) <- allStepNames - nEventPerSampleList[[s]] <- lapply( - objInfos[,"ObjectName"], - FUN = function(objName) { - message("Reading object ", objName, "...") - ff <- CytoPipeline::getCytoPipelineFlowFrame( - pipL, - path = path, - whichQueue = "pre-processing", - sampleFile = whichSampleFiles[s], - objectName = objName) - flowCore::nrow(ff)}) + for (i in seq_len(nrow(DFM))) { + sampleName <- DFM[i, "fcsfile.x"] + stepName <- DFM[i, "stepName"] + eventNbs[sampleName, stepName] <- DFM[i, "nEvents.x"] + } + } + else { + # version < 1.3.6 => collect nb of events from reading the flowFrames + # from cache - stepNames <- vapply(objInfos[,"ObjectName"], - FUN = function(str){ - gsub(x = str, - pattern = "_obj", - replacement = "") - }, - FUN.VALUE = character(length = 1)) + nEventPerSampleList <- list() + allStepNames <- c() + for (s in seq_along(whichSampleFiles)) { + message("Collecting nb of events for sample file ", + whichSampleFiles[s], + "...") + objInfos <- CytoPipeline::getCytoPipelineObjectInfos( + pipL, + whichQueue = "pre-processing", + sampleFile = whichSampleFiles[s], + path = path) + objInfos <- objInfos[objInfos[,"ObjectClass"] == "flowFrame",] + + nEventPerSampleList[[s]] <- lapply( + objInfos[,"ObjectName"], + FUN = function(objName) { + message("Reading object ", objName, "...") + ff <- CytoPipeline::getCytoPipelineFlowFrame( + pipL, + path = path, + whichQueue = "pre-processing", + sampleFile = whichSampleFiles[s], + objectName = objName) + flowCore::nrow(ff)}) + + stepNames <- vapply(objInfos[,"ObjectName"], + FUN = function(str){ + gsub(x = str, + pattern = "_obj", + replacement = "") + }, + FUN.VALUE = character(length = 1)) + + names(nEventPerSampleList[[s]]) <- stepNames + + allStepNames <- union(allStepNames, stepNames) + } - names(nEventPerSampleList[[s]]) <- stepNames + nSampleFiles <- length(whichSampleFiles) + nAllSteps <- length(allStepNames) + eventNbs <- matrix(rep(NA, nSampleFiles * nAllSteps), + nrow = nSampleFiles) + rownames(eventNbs) <- as.character(whichSampleFiles) + colnames(eventNbs) <- allStepNames - allStepNames <- union(allStepNames, stepNames) - } - - nSampleFiles <- length(whichSampleFiles) - nAllSteps <- length(allStepNames) - eventNbs <- matrix(rep(NA, nSampleFiles * nAllSteps), - nrow = nSampleFiles) - rownames(eventNbs) <- as.character(whichSampleFiles) - colnames(eventNbs) <- allStepNames - - for (s in seq_along(whichSampleFiles)) { - stepNames <- names(nEventPerSampleList[[s]]) - for (st in seq_along(stepNames)) { - eventNbs[as.character(whichSampleFiles)[s], - stepNames[st]] <- - nEventPerSampleList[[s]][[st]] + for (s in seq_along(whichSampleFiles)) { + stepNames <- names(nEventPerSampleList[[s]]) + for (st in seq_along(stepNames)) { + eventNbs[as.character(whichSampleFiles)[s], + stepNames[st]] <- + nEventPerSampleList[[s]][[st]] + } } } - as.data.frame(eventNbs) - } diff --git a/tests/testthat/test-CytoPipeline.R b/tests/testthat/test-CytoPipeline.R index a275f4c..bea2e09 100644 --- a/tests/testthat/test-CytoPipeline.R +++ b/tests/testthat/test-CytoPipeline.R @@ -1157,6 +1157,6 @@ test_that("collectNbOfRetainedEvents works", { experimentName = experimentName, path = outputDir, whichSampleFiles = 3 - ), regexp = "sampleFile out of bounds") + ), regexp = "whichSampleFiles out of bounds") }) diff --git a/vignettes/CytoPipeline.Rmd b/vignettes/CytoPipeline.Rmd index b827e6a..c6788fe 100644 --- a/vignettes/CytoPipeline.Rmd +++ b/vignettes/CytoPipeline.Rmd @@ -513,6 +513,105 @@ obj <- getCytoPipelineObjectFromCache(pipL_PeacoQC, show(obj) ``` +## Getting and plotting the nb of retained events are each step + +Getting the number of retained events at each pre-processing step, and tracking +these changes throughout the pre-processing steps of a pipeline +for different samples is a useful quality control. + +This can be implemented using *CytoPipeline* `collectNbOfRetainedEvents()` +function. Examples of using this function in quality control plots are shown +in this section. + +```{r getNbEvents} +ret <- CytoPipeline::collectNbOfRetainedEvents( + experimentName = "OMIP021_PeacoQC", + path = workDir +) +ret +``` +```{r getRetainedProp} +retainedProp <- + as.data.frame(t(apply( + ret, + MARGIN = 1, + FUN = function(line) { + if (length(line) == 0 || is.na(line[1])) { + as.numeric(rep(NA, length(line))) + } else { + round(line/line[1], 3) + } + } + ))) + +retainedProp <- retainedProp[-1] + +retainedProp +``` + +```{r getStepRemovedProp} +stepRemovedProp <- + as.data.frame(t(apply( + ret, + MARGIN = 1, + FUN = function(line) { + if (length(line) == 0) { + as.numeric(rep(NA, length(line))) + } else { + round(1-line/dplyr::lag(line), 3) + } + } + ))) + +stepRemovedProp <- stepRemovedProp[-1] + +stepRemovedProp +``` + +```{r loadAddPackages} +library("reshape2") +library("ggplot2") +``` + + + +```{r plotRetainedProp} + +myGGPlot <- function(DF, title){ + stepNames = colnames(DF) + rowNames = rownames(DF) + DFLongFmt <- reshape(DF, + direction = "long", + v.names = "proportion", + varying = stepNames, + timevar = "step", + time = stepNames, + ids = rowNames) + + DFLongFmt$step <- factor(DFLongFmt$step, levels = stepNames) + + + ggplot(data = DFLongFmt, + mapping = aes(x = step, y = proportion, text = id)) + + geom_point(col = "blue") + + ggtitle(title) + + theme(axis.text.x = element_text(angle = 90)) + +} + +p1 <- myGGPlot(DF = retainedProp, + title = "Retained event proportion at each step") +p1 +``` + + +```{r plotStepRemovedProp} +p2 <- myGGPlot(DF = stepRemovedProp, + title = "Event proportion removed by each step") +p2 +``` + + ## Interactive visualization