Skip to content

Commit

Permalink
- execute() now stores the nb of events retained at each pre-proces…
Browse files Browse the repository at this point in the history
…sing step,

to speed-up `collectNbOfRetainedEvents()`
- bumped version to 1.3.6
  • Loading branch information
phauchamps committed Feb 23, 2024
1 parent 8a8681b commit 97e31fc
Show file tree
Hide file tree
Showing 5 changed files with 205 additions and 53 deletions.
6 changes: 4 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: CytoPipeline
Title: Automation and visualization of flow cytometry data analysis pipelines
Version: 1.3.5
Version: 1.3.6
Authors@R:
c(person(given = "Philippe",
family = "Hauchamps",
Expand Down Expand Up @@ -71,6 +71,8 @@ Suggests:
diffviewer,
knitr,
rmarkdown,
BiocStyle
BiocStyle,
reshape2,
dplyr
VignetteBuilder: knitr
Config/testthat/edition: 3
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# CytoPipeline 1.3

## CytoPipeline 1.3.6
- `execute()` now stores the nb of events retained at each pre-processing step,
to speed-up `collectNbOfRetainedEvents()`

## CytoPipeline 1.3.5
- added CITATION file

Expand Down
147 changes: 97 additions & 50 deletions R/CytoPipeline-functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -825,6 +825,12 @@ execute <- function(x,
as.json.CytoProcessingStep(
x@flowFramesPreProcessingQueue[[s]]
)
# specific for pre-processing step generating flow frames:
# store nb of events at step
nEvents <- 0
if (inherits(res, "flowFrame")) {
nEvents <- flowCore::nrow(res)
}
genericMeta <-
data.frame(list(
rid = names(cacheResourceFile),
Expand All @@ -838,7 +844,8 @@ execute <- function(x,
preprocessingMeta <-
data.frame(list(
rid = names(cacheResourceFile),
fcsfile = basename(file)
fcsfile = basename(file),
nEvents = nEvents
))

BiocFileCache::bfcmeta(bfc, name = "generic", append = TRUE) <-
Expand Down Expand Up @@ -2043,64 +2050,104 @@ collectNbOfRetainedEvents <- function(

if (missing(whichSampleFiles)) {
whichSampleFiles <- CytoPipeline::sampleFiles(pipL)
} else if (is.numeric(whichSampleFiles)) {
sF <- CytoPipeline::sampleFiles(pipL)
if (min(whichSampleFiles) < 1 || max(whichSampleFiles) > length(sF)) {
stop("whichSampleFiles out of bounds")
}
whichSampleFiles <- CytoPipeline::sampleFiles(pipL)[whichSampleFiles]
} else if (is.character(whichSampleFiles)) {
whichSampleFiles <- basename(whichSampleFiles)
} else {
stop("whichSampleFiles should be either character or numeric")
}

nEventPerSampleList <- list()
allStepNames <- c()
for (s in seq_along(whichSampleFiles)) {
message("Collecting nb of events for sample file ",
whichSampleFiles[s],
"...")
objInfos <- CytoPipeline::getCytoPipelineObjectInfos(
pipL,
whichQueue = "pre-processing",
sampleFile = whichSampleFiles[s],
path = path)
objInfos <- objInfos[objInfos[,"ObjectClass"] == "flowFrame",]
cacheDir <- file.path(path, experimentName, ".cache")
bfc <- BiocFileCache::BiocFileCache(cacheDir, ask = FALSE)

metaPrepDF <- BiocFileCache::bfcmeta(bfc, name = "preprocessing")

if ("nEvents" %in% colnames(metaPrepDF)) {
# as of CytoPipeline version 1.3.6,
# nb of events are stored for each step
cacheInfo <- BiocFileCache::bfcinfo(bfc)
metaPrepDF <- metaPrepDF[metaPrepDF$fcsfile %in% whichSampleFiles,]
DFM <- merge(x = metaPrepDF, y = cacheInfo, by = "rid")
#browser()
DFM <-
DFM[order(DFM$fcsfile.x, DFM$stepNb),][
,c("rid","fcsfile.x","stepNb", "stepName", "nEvents.x")]
allStepNames <- unique(DFM$stepName)
nSampleFiles <- length(whichSampleFiles)
nAllSteps <- length(allStepNames)
eventNbs <- matrix(rep(NA, nSampleFiles * nAllSteps),
nrow = nSampleFiles)
rownames(eventNbs) <- as.character(whichSampleFiles)
colnames(eventNbs) <- allStepNames

nEventPerSampleList[[s]] <- lapply(
objInfos[,"ObjectName"],
FUN = function(objName) {
message("Reading object ", objName, "...")
ff <- CytoPipeline::getCytoPipelineFlowFrame(
pipL,
path = path,
whichQueue = "pre-processing",
sampleFile = whichSampleFiles[s],
objectName = objName)
flowCore::nrow(ff)})
for (i in seq_len(nrow(DFM))) {
sampleName <- DFM[i, "fcsfile.x"]
stepName <- DFM[i, "stepName"]
eventNbs[sampleName, stepName] <- DFM[i, "nEvents.x"]
}
}
else {
# version < 1.3.6 => collect nb of events from reading the flowFrames
# from cache

stepNames <- vapply(objInfos[,"ObjectName"],
FUN = function(str){
gsub(x = str,
pattern = "_obj",
replacement = "")
},
FUN.VALUE = character(length = 1))
nEventPerSampleList <- list()
allStepNames <- c()
for (s in seq_along(whichSampleFiles)) {
message("Collecting nb of events for sample file ",
whichSampleFiles[s],
"...")
objInfos <- CytoPipeline::getCytoPipelineObjectInfos(
pipL,
whichQueue = "pre-processing",
sampleFile = whichSampleFiles[s],
path = path)
objInfos <- objInfos[objInfos[,"ObjectClass"] == "flowFrame",]

nEventPerSampleList[[s]] <- lapply(
objInfos[,"ObjectName"],
FUN = function(objName) {
message("Reading object ", objName, "...")
ff <- CytoPipeline::getCytoPipelineFlowFrame(
pipL,
path = path,
whichQueue = "pre-processing",
sampleFile = whichSampleFiles[s],
objectName = objName)
flowCore::nrow(ff)})

stepNames <- vapply(objInfos[,"ObjectName"],
FUN = function(str){
gsub(x = str,
pattern = "_obj",
replacement = "")
},
FUN.VALUE = character(length = 1))

names(nEventPerSampleList[[s]]) <- stepNames

allStepNames <- union(allStepNames, stepNames)
}

names(nEventPerSampleList[[s]]) <- stepNames
nSampleFiles <- length(whichSampleFiles)
nAllSteps <- length(allStepNames)
eventNbs <- matrix(rep(NA, nSampleFiles * nAllSteps),
nrow = nSampleFiles)
rownames(eventNbs) <- as.character(whichSampleFiles)
colnames(eventNbs) <- allStepNames

allStepNames <- union(allStepNames, stepNames)
}

nSampleFiles <- length(whichSampleFiles)
nAllSteps <- length(allStepNames)
eventNbs <- matrix(rep(NA, nSampleFiles * nAllSteps),
nrow = nSampleFiles)
rownames(eventNbs) <- as.character(whichSampleFiles)
colnames(eventNbs) <- allStepNames

for (s in seq_along(whichSampleFiles)) {
stepNames <- names(nEventPerSampleList[[s]])
for (st in seq_along(stepNames)) {
eventNbs[as.character(whichSampleFiles)[s],
stepNames[st]] <-
nEventPerSampleList[[s]][[st]]
for (s in seq_along(whichSampleFiles)) {
stepNames <- names(nEventPerSampleList[[s]])
for (st in seq_along(stepNames)) {
eventNbs[as.character(whichSampleFiles)[s],
stepNames[st]] <-
nEventPerSampleList[[s]][[st]]
}
}
}

as.data.frame(eventNbs)

}
2 changes: 1 addition & 1 deletion tests/testthat/test-CytoPipeline.R
Original file line number Diff line number Diff line change
Expand Up @@ -1157,6 +1157,6 @@ test_that("collectNbOfRetainedEvents works", {
experimentName = experimentName,
path = outputDir,
whichSampleFiles = 3
), regexp = "sampleFile out of bounds")
), regexp = "whichSampleFiles out of bounds")

})
99 changes: 99 additions & 0 deletions vignettes/CytoPipeline.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -513,6 +513,105 @@ obj <- getCytoPipelineObjectFromCache(pipL_PeacoQC,
show(obj)
```

## Getting and plotting the nb of retained events are each step

Getting the number of retained events at each pre-processing step, and tracking
these changes throughout the pre-processing steps of a pipeline
for different samples is a useful quality control.

This can be implemented using *CytoPipeline* `collectNbOfRetainedEvents()`
function. Examples of using this function in quality control plots are shown
in this section.

```{r getNbEvents}
ret <- CytoPipeline::collectNbOfRetainedEvents(
experimentName = "OMIP021_PeacoQC",
path = workDir
)
ret
```
```{r getRetainedProp}
retainedProp <-
as.data.frame(t(apply(
ret,
MARGIN = 1,
FUN = function(line) {
if (length(line) == 0 || is.na(line[1])) {
as.numeric(rep(NA, length(line)))
} else {
round(line/line[1], 3)
}
}
)))
retainedProp <- retainedProp[-1]
retainedProp
```

```{r getStepRemovedProp}
stepRemovedProp <-
as.data.frame(t(apply(
ret,
MARGIN = 1,
FUN = function(line) {
if (length(line) == 0) {
as.numeric(rep(NA, length(line)))
} else {
round(1-line/dplyr::lag(line), 3)
}
}
)))
stepRemovedProp <- stepRemovedProp[-1]
stepRemovedProp
```

```{r loadAddPackages}
library("reshape2")
library("ggplot2")
```



```{r plotRetainedProp}
myGGPlot <- function(DF, title){
stepNames = colnames(DF)
rowNames = rownames(DF)
DFLongFmt <- reshape(DF,
direction = "long",
v.names = "proportion",
varying = stepNames,
timevar = "step",
time = stepNames,
ids = rowNames)
DFLongFmt$step <- factor(DFLongFmt$step, levels = stepNames)
ggplot(data = DFLongFmt,
mapping = aes(x = step, y = proportion, text = id)) +
geom_point(col = "blue") +
ggtitle(title) +
theme(axis.text.x = element_text(angle = 90))
}
p1 <- myGGPlot(DF = retainedProp,
title = "Retained event proportion at each step")
p1
```


```{r plotStepRemovedProp}
p2 <- myGGPlot(DF = stepRemovedProp,
title = "Event proportion removed by each step")
p2
```



## Interactive visualization

Expand Down

0 comments on commit 97e31fc

Please sign in to comment.