Skip to content

Commit

Permalink
In pseudobulk detect if smart subset of matrix can be used
Browse files Browse the repository at this point in the history
  • Loading branch information
const-ae committed Jun 12, 2024
1 parent 6ed529e commit 5fb22f7
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 4 deletions.
17 changes: 13 additions & 4 deletions R/pseudobulk.R
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,12 @@ pseudobulk <- function(data, group_by, ...,
if(verbose) message("Aggregating assay '", assay_name, "' using '", aggr_fnc_res$label, "'.")
data_mat <- SummarizedExperiment::assay(data, assay_name)
new_data_mat <- do.call(cbind, lapply(group_split, function(idx){
aggr_fnc(data_mat[,idx,drop=FALSE])
if(aggr_fnc_res$smart_subset){
# This optimization needs sparseMatrixStats >= 1.17.1 to be effective
aggr_fnc(data_mat, cols = idx)
}else{
aggr_fnc(data_mat[,idx,drop=FALSE])
}
}))
rownames(new_data_mat) <- rownames(data)
new_data_mat
Expand All @@ -110,7 +115,11 @@ pseudobulk <- function(data, group_by, ...,
if(is(tdata_mat, "LinearEmbeddingMatrix")){
data_mat <- t(SingleCellExperiment::sampleFactors(tdata_mat))
new_data_mat <- do.call(cbind, lapply(group_split, function(idx){
aggr_fnc(data_mat[,idx,drop=FALSE])
if(aggr_fnc_res$smart_subset){
aggr_fnc(data_mat, cols = idx)
}else{
aggr_fnc(data_mat[,idx,drop=FALSE])
}
}))
SingleCellExperiment::LinearEmbeddingMatrix(t(new_data_mat), SingleCellExperiment::featureLoadings(tdata_mat),
factorData = SingleCellExperiment::factorData(tdata_mat))
Expand Down Expand Up @@ -191,8 +200,8 @@ get_aggregation_function <- function(assay_name, aggregation_functions){
}else{
label <- "custom function"
}

list(fnc = aggr_fnc, label = label)
smart_subset <- "cols" %in% names(formals(aggr_fnc))
list(fnc = aggr_fnc, label = label, smart_subset = smart_subset)
}

#' Quote grouping variables
Expand Down
11 changes: 11 additions & 0 deletions tests/testthat/test-pseudobulk.R
Original file line number Diff line number Diff line change
Expand Up @@ -98,3 +98,14 @@ test_that("NA's don't mess up the results", {
psce <- pseudobulk(sce, group_by = vars(fav_food), make_colnames = FALSE)
expect_equal(SummarizedExperiment::colData(psce)$fav_food, unique(SummarizedExperiment::colData(sce)$fav_food))
})


# Compare speed of complex aggregation with smart_subsetting
# mat <- as(matrix(rpois(n = 1000 * 1e5, lambda = 0.1), nrow = 1000, ncol = 1e5), "dgCMatrix")
# sce <- SingleCellExperiment::SingleCellExperiment(list(counts = mat))
# grouping <- sample(1:3000, size = 1e5, replace = TRUE)
# bench::mark(
# smart = pseudobulk(sce, group_by = vars(grouping), aggregation_functions = list("counts" = sparseMatrixStats::rowMeans2)),
# not_smart = pseudobulk(sce, group_by = vars(grouping), aggregation_functions = list("counts" = Matrix::rowMeans))
# )

0 comments on commit 5fb22f7

Please sign in to comment.