diff --git a/.Rbuildignore b/.Rbuildignore new file mode 100644 index 0000000..91114bf --- /dev/null +++ b/.Rbuildignore @@ -0,0 +1,2 @@ +^.*\.Rproj$ +^\.Rproj\.user$ diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..f160a66 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,31 @@ +name: CI + +on: + push: + branches: + - '**' + paths-ignore: + - '*.md' + - '*.MD' + - '*.ignore' + - LICENSE + +jobs: + + SigGenes: + + runs-on: ubuntu-latest + + steps: + + - uses: actions/checkout@v4 + + # Run everything via the Bioconductor docker image as it has almost all dependencies preinstalled + - name: devtools-check-docker + run: | + bioc_install='BiocManager::install(c("edgeR", "limma", "Matrix", "S4Vectors", "scuttle", "SummarizedExperiment", "SingleCellExperiment"))' + dev_check='devtools::check("/SigGenes/")' + dev_doc='devtools::document("/SigGenes/")' + testthat='testthat::test_file("/SigGenes/tests/testthat/all_tests.R")' + docker run -v "$(pwd)":"/SigGenes/" bioconductor/bioconductor_docker:RELEASE_3_18 Rscript --vanilla -e "${bioc_install}; ${dev_check}; ${dev_doc}; ${testthat}" + docker run -v "$(pwd)":"/SigGenes/" bioconductor/bioconductor_docker:RELEASE_3_19 Rscript --vanilla -e "${bioc_install}; ${dev_check}; ${dev_doc}; ${testthat}" diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..9a85036 --- /dev/null +++ b/.gitignore @@ -0,0 +1,9 @@ +..R* +gitHeadInfo.gin +R/.Rapp.history +*.DS_* +.Rproj.user +.Rhistory +~* +*.tar.gz +.Rproj.user diff --git a/DESCRIPTION b/DESCRIPTION new file mode 100644 index 0000000..cfb60b6 --- /dev/null +++ b/DESCRIPTION @@ -0,0 +1,24 @@ +Package: SigGenes +Title: Automated differential analysis and signature generation for bulk and single-cell data +Description: Automated differential analysis between groups based on limma-voom and generation of per-group signatures via custom filtering. +Version: 1.0.0 +Author: Alexander Bender [aut,cre] +Maintainer: Alexander Bender +License: LGPL (>=2) +Encoding: UTF-8 +Depends: + R (>= 4.0), +Imports: + edgeR, + limma, + Matrix, + methods, + S4Vectors, + scuttle, + SingleCellExperiment, + SummarizedExperiment +Suggests: + testthat +URL: https://github.com/atpoint/SigGenes +BugReports: https://github.com/atpoint/SigGenes/issues +RoxygenNote: 7.2.3 diff --git a/NAMESPACE b/NAMESPACE new file mode 100644 index 0000000..0e88592 --- /dev/null +++ b/NAMESPACE @@ -0,0 +1,28 @@ +# Generated by roxygen2: do not edit by hand + +export(create_signatures) +export(de_limma) +export(get_pexpr) +importFrom(Matrix,t) +importFrom(S4Vectors,SimpleList) +importFrom(SingleCellExperiment,"sizeFactors<-") +importFrom(SingleCellExperiment,colData) +importFrom(SingleCellExperiment,sizeFactors) +importFrom(SummarizedExperiment,assay) +importFrom(SummarizedExperiment,assayNames) +importFrom(SummarizedExperiment,colData) +importFrom(edgeR,DGEList) +importFrom(edgeR,calcNormFactors) +importFrom(edgeR,filterByExpr) +importFrom(edgeR,voomLmFit) +importFrom(limma,contrasts.fit) +importFrom(limma,is.fullrank) +importFrom(limma,makeContrasts) +importFrom(limma,topTreat) +importFrom(limma,treat) +importFrom(methods,as) +importFrom(methods,is) +importFrom(scuttle,aggregateAcrossCells) +importFrom(scuttle,librarySizeFactors) +importFrom(stats,model.matrix) +importFrom(utils,combn) diff --git a/NEWS b/NEWS new file mode 100644 index 0000000..8f10d51 --- /dev/null +++ b/NEWS @@ -0,0 +1,4 @@ +CHANGES IN VERSION 1.0.0 +------------------------ + + o Initial commit diff --git a/R/de_testing.R b/R/de_testing.R new file mode 100644 index 0000000..06f4c0a --- /dev/null +++ b/R/de_testing.R @@ -0,0 +1,298 @@ +#' Differential expression analysis on single-cell or pseudobulk level using limma-voom. +#' +#' Function accepts a SingleCellExperiment and expects an assay "counts" representing raw counts. +#' It then performs DE analysis forming all possible contrasts with limma-voom \(Law et al. 2014\), using the \code{voomLmFit} function from the `edgeR` package. +#' It is similar to running \code{voom} followed by \(optionally\) \code{duplicateCorrelation} and \code{lmFit}, for details see the details section in \code{edgeR::voomLmFit}. +#' DE testing is done by either contrasting all groupwise combinations, or by testing each group level vs the average of all other group levels. +#' The function can optionally aggregate single-cell data into pseudobulks. The function expects one group +#' to be the main covariate for testing and allows an arbitrary number of other covariates to be adjusted for. Alternatively, the user can specify a blocking +#' factor, for example to account for repeated measures. +#' +#' The output is a list with DE stats per contrast. All possible contrasts are displayed, for example for groups A/B/C it's A-B, A-C, B-C, B-A, C-A, C-B. The +#' individual results for identical contrasts e.g. A-B / B-A can differ if \code{min_pct} is above 0, as each list is filtered that the non-reference expresses +#' the gene with more than \code{min_pct} of cells. If {min_pct} is 0 then these two lists would be the same. +#' +#' +#' @param x A SingleCellExperiment +#' @param use_assay Name of the assay representing the raw counts +#' @param aggregate_by Vector with names of the colData columns to aggregate by. If NULL then +#' no aggregation is done prior to DE analysis. +#' @param use_existing_sf Logical, whether to use the size factors from \code{sizeFactors(x)} for normalization. +#' Only applies if no pseudobulk aggregation is performed. If FALSE and no pseudobulk aggregation is performed then +#' normalization is done via the \code{calcNormFactors} function from `edgeR`. +#' The default is TRUE as one usually does DE analysis on single-cell level after preprocessing and cluster, so normalization +#' usually has already been performed. If TRUE and no size factors can be found will run library size normalization with the +#' \code{librarySizeFactors} function from `scuttle`. +#' @param main_covariate Name of the main covariate for the testing. +#' @param other_covariates Vector with other covariate names to adjust for in DE analysis. +#' @param block Name of a factor to block for. +#' @param min_pct The minimum percentage of cells in at least one of the two group levels per contrast expressing the gene (expressed means counts above 0). +#' @param min_fc Minimum fold change to test against via \code{limma::treat()}. +#' @param use_weights Logical, whether to use empirical sample weights in \code{edgeR::voomLmFit} +#' @param mode Either "pairwise" to run all pairwise comparisons between `main_covariate` levels, +#' or "average" to test each level against the average of all other levels. +#' @param use_filterByExpr Logical, whether to run \code{filterByExpr} from `edgeR` for prefiltering of counts. +#' This makes sense when testing for example bulk RNA-seq data. +#' @param return_object Logical, whether to return the DGEList in the output +#' @param delim A delimiter string used in the names of the output list. +#' +#' @examples +#' set.seed(1) +#' sce <- scuttle::mockSCE(ncells=1000, ngenes=1000, nspikes=0) +#' sce$group <- rep(LETTERS[1:4], each=250) +#' sce$donor <- rep(LETTERS[1:4], 250) +#' res_pairwise <- de_limma(x = sce, aggregate_by = c("group", "donor"), +#' main_covariate = "group", other_covariates = c("donor")) +#' res_pairwise_block <- de_limma(x = sce, aggregate_by = c("group", "donor"), +#' main_covariate = "group", block = "donor") +#' res_average <- de_limma(x = sce, aggregate_by = c("group", "donor"), +#' main_covariate = "group", other_covariates = c("donor"), +#' mode="average") +#' +#' @author Alexander Bender +#' +#' @references +#' Law, CW, Chen, Y, Shi, W, Smyth, GK (2014). +#' Voom: precision weights unlock linear model analysis tools for RNA-seq read counts. Genome Biology 15, R29. +#' \url{doi:10.1186/gb-2014-15-2-r29} +#' +#' @seealso \code{\link{voom}} +#' +#' @importFrom SingleCellExperiment colData sizeFactors sizeFactors<- +#' @importFrom SummarizedExperiment assay assayNames colData +#' @importFrom scuttle aggregateAcrossCells librarySizeFactors +#' @importFrom edgeR calcNormFactors DGEList filterByExpr voomLmFit +#' @importFrom stats model.matrix +#' @importFrom limma contrasts.fit is.fullrank makeContrasts treat topTreat +#' @importFrom utils combn +#' @importFrom S4Vectors SimpleList +#' @importFrom methods as is +#' +#' @export +de_limma <- function(x, use_assay = "counts", aggregate_by=NULL, use_existing_sf = TRUE, + main_covariate, other_covariates=NULL, block=NULL, + min_pct=0, min_fc = 1.0, use_weights = FALSE, + mode=c("pairwise", "average"), + use_filterByExpr = FALSE, return_object = FALSE, delim="_vs_"){ + + # Checks + is_sce <- is(x, "SingleCellExperiment") + if(!is_sce) stop("x must be a SingleCellExperiment") + + if(!use_assay %in% assayNames(x)) + stop(paste("x contains no assay named", use_assay)) + + if(!is.null(aggregate_by)){ + is_agg <- aggregate_by %in% colnames(colData(x)) + if(!sum(is_agg) == length(aggregate_by)) stop("Not all entries in are in colData(x)") + } + + if(!is.null(block)){ + if(!block %in% colnames(colData(x))) + stop("Name of block not in colData(x)") + } + + if(!is(use_existing_sf, "logical")) + stop("use_existing_sf must be logical") + + is_main <- main_covariate %in% colnames(colData(x)) + if(!is_main) stop("Not all entries in are in colData(x)") + + if(!is.null(other_covariates)){ + is_other <- other_covariates %in% colnames(colData(x)) + if(!is_other) stop("Not all entries in are in colData(x)") + } + + is_pct <- min_pct >= 0 & is.numeric(min_pct) + if(!is_pct) stop("min_pct must be numeric and not negative") + + is_fc <- min_fc >= 1.0 & is.numeric(min_fc) + if(!is_fc) stop("is_fc must be numeric and >= 1.0") + + if(!is.logical(use_weights)) stop("use_weights must be logical") + + invisible(match.arg(mode, c("pairwise", "average"))) + mode <- match.arg(mode) + + if(!is(delim, "character")) stop("delim must be a charcter") + + # Aggregate to pseudobulk + if(!is.null(aggregate_by)){ + + pb <- aggregateAcrossCells(x, id=colData(x)[,aggregate_by], use.assay.type="counts") + pb <- DGEList(counts = assay(pb), samples = data.frame(colData(pb), check.names = FALSE)) + run_norm <- TRUE + + } else { + + w <- which(assayNames(x) == "counts") + pb <- DGEList(counts = assay(x, use_assay), samples = data.frame(colData(x), check.names = FALSE)) + sf <- suppressWarnings(sizeFactors(x)) + # scran::convertTo() + if (!is.null(sf)) { + nf <- log(sf/y$samples$lib.size) + nf <- exp(nf - mean(nf)) + y$samples$norm.factors <- nf + } + + if(use_existing_sf){ + + if(is.null(sizeFactors(x))){ + + sf <- librarySizeFactors(x) + sizeFactors(x) <- sf + run_norm <- FALSE + + } + + run_norm <- FALSE + + } else { + + run_norm <- TRUE + + } + + } + + pb$samples <- droplevels.data.frame(pb$samples) + pb$genes <- NULL + + # Require replication in at least one group + max_n <- max(table(pb$samples[[main_covariate]])) + if(max_n == 1) stop("No replication in any group -- DE analysis not possible!") + + # Design, allowing any additional covariates + others <- if(!is.null(other_covariates)) paste0(" + ", other_covariates) else "" + f <- as.formula(paste0("~ 0 + ", main_covariate, others)) + design <- model.matrix(f, pb$samples) + colnames(design) <- gsub(paste0("^", main_covariate), "", colnames(design)) + + is_fr <- is.fullrank(design) + if(!is_fr) stop("Design is not full-rank, meaning you cannot adjust for these covariates!") + + # Optionally prefilter with edgeR + if(use_filterByExpr){ + + keep_fbe <- filterByExpr(pb, group = pb$samples[[main_covariate]]) + pb <- pb[keep_fbe,] + + + } + + # Prefilter to genes that in at least one group is expressed by, then normalize + pexp <- get_pexpr(assay(x, use_assay), group = x[[main_covariate]]) + + if(min_pct > 0){ + + keep <- apply(pexp >= min_pct, 1, sum) > 0 + pb <- pb[keep,] + + } + + rm(x) + + if(nrow(pb) == 0) return(NULL) + if(nrow(pb) < 10){ + warning("Fewer than 10 genes (arbitrary threshold to trigger this warning) left after expression filter.") + } + + # Only run normalization if doing pseudobulk analysis + if(run_norm){ + pb <- calcNormFactors(pb, method = "TMMwsp") + } + + # For now we do voomLmFit + if(is.null(block)){ + blocker <- NULL + } else { + blocker <- pb$samples[[block]] + } + + v <- voomLmFit(counts = pb, design = design, sample.weights = use_weights, block = blocker) + + # Make all pairwise comparisons and make the contrasts.fit + ux <- pb$samples[[main_covariate]] + if(is(ux, "factor")){ + u <- levels(droplevels(ux)) + } else { + u <- unique(as.character(ux)) + } + + # -- Pairwise -- # ----------------------------------------------------------- + if(mode=="pairwise"){ + + contrasts <- .make_contrasts_pairwise(u, design, delim) + v <- contrasts.fit(fit = v, contrasts = contrasts) + v <- treat(v, fc=min_fc) + + # Extract results per contrast + iter <- colnames(contrasts) + de_results <- lapply(iter, function(i){ + + s <- strsplit(i, delim)[[1]] + first <- s[1] + second <- s[2] + + tt <- topTreat(fit = v, coef = i, number=Inf) + tt <- tt[order(tt$t, decreasing = TRUE),] + pe <- pexp[rownames(tt), c(first, second)] + colnames(pe) <- c("pct.1", "pct.2") + tt <- cbind(tt, pe) + + tt <- tt[tt$pct.1 >= min_pct | tt$pct.2 >= min_pct,] + tt$adj.P.Val <- p.adjust(tt$P.Value, "BH") + tt + + }) + + names(de_results) <- iter + + } + + if(mode=="average"){ + + contrasts <- .make_contrasts_average(u, design) + v <- contrasts.fit(fit = v, contrasts = contrasts) + v <- treat(v, fc=min_fc) + + # Extract significant genes per contrast + iter <- colnames(contrasts) + de_results <- lapply(iter, function(i){ + + tt <- topTreat(fit = v, coef = i, number=Inf) + tt <- tt[order(tt$t, decreasing = TRUE),] + pct.1 <- round(as.numeric(pexp[rownames(tt), i, drop=TRUE]), 2) + pct.2 <- round(as.numeric(apply(pexp[rownames(tt), setdiff(colnames(pexp), i), drop=FALSE], 1, mean)), 2) + tt$pct.1 <- pct.1 + tt$pct.2 <- pct.2 + + tt <- tt[tt$pct.1 >= min_pct,] + tt$adj.P.Val <- p.adjust(tt$P.Value, "BH") + tt + + }) + + names(de_results) <- iter + + } + + de_results <- sapply(de_results, function(x) data.frame(Gene = rownames(x), x), simplify = FALSE) + de_results <- as(de_results, "SimpleList") + + to_return <- SimpleList(results = de_results) + + if(return_object){ + + to_return[["DGEList"]] <- pb + + } + + to_return[["params"]] <- list() + to_return[["params"]]$mode <- mode + to_return[["params"]]$delim <- delim + to_return[["params"]]$min_pct <- min_pct + + return(to_return) + +} diff --git a/R/signatures.R b/R/signatures.R new file mode 100644 index 0000000..8894ce9 --- /dev/null +++ b/R/signatures.R @@ -0,0 +1,258 @@ +#' Internal function to filter and rank DEG results +#' +#' @author Alexander Bender +#' +#' @param res The output of \code{de_limma} run with \code{mode="pairwise"} +#' +#' @inheritParams create_signatures +#' +#' @importFrom methods as is +#' +#' @keywords internal +#' +rank_degs <- function(res, delim, + signif_column, signif_threshold, + effect_column, effect_threshold, + rank_column, rank_method, + gene_column, min_pct){ + + if(!class(res) %in% c("list", "SimpleList") | is.null(names(res))){ + stop("res must be a named list", call.=FALSE) + } + + if(!all(grepl(delim, names(res)))) stop("delim was not found in all names of the res") + + invisible(match.arg(arg=class(signif_threshold), choices="numeric")) + invisible(match.arg(arg=class(effect_threshold), choices="numeric")) + + check.gf <- sum(unlist(lapply(res, function(x) + if(!gene_column %in% colnames(x)) return(1) else return(0)))) + check.sf <- sum(unlist(lapply(res, function(x) + if(!signif_column %in% colnames(x)) return(1) else return(0)))) + check.ef <- sum(unlist(lapply(res, function(x) + if(!effect_column %in% colnames(x)) return(1) else return(0)))) + check.rk <- sum(unlist(lapply(res, function(x) + if(!rank_column %in% colnames(x)) return(1) else return(0)))) + + if(check.gf>0) stop("gene_column does not exist in all entries of res.list!") + if(check.sf>0) stop("signif_column does not exist in all entries of res.list!") + if(check.ef>0) stop("effect_column does not exist in all entries of res.list!") + if(check.rk>0) stop("rank_column does not exist in all entries of res.list!") + + rank_method <- match.arg(rank_method, c("decreasing", "increasing")) + + # Ranking + nm <- data.frame(t(as.data.frame(strsplit(names(res), delim), check.names = FALSE)), check.names = FALSE) + colnames(nm) <- c("first", "second") + rownames(nm) <- NULL + nm2 <- nm[,2:1] + nm$direction <- "forward" + nm2$direction <- "reverse" + colnames(nm2) <- colnames(nm) + nm <- rbind(nm, nm2) + + u <- unique(c(nm$first, nm$second)) + + l <- sapply(u, function(f){ + + y <- nm[nm$first %in% f,] + + y2 <- lapply(1:nrow(y), function(z){ + + yy <- y[z,,drop=FALSE] + + if(yy$direction=="forward"){ + + current_contrast <- paste0(yy$first, delim, yy$second) + + here <- res[[current_contrast]] + here <- here[here$pct.1 >= min_pct,] + + } else { + + current_contrast <- paste0(yy$second, delim, yy$first) + here <- res[[current_contrast]] + here[[effect_column]] <- here[[effect_column]] * -1 + here <- here[here$pct.2 >= min_pct,] + + } + + h <- here[here[[signif_column]] < signif_threshold & here[[effect_column]] > effect_threshold,] + k <- if(rank_method=="decreasing") TRUE else FALSE + o <- order(h[[rank_column]], decreasing = k) + h[o,][[gene_column]] + + }) + + names(y2) <- y$second + return(y2) + + + }, simplify = FALSE) + + l <- as(l, "SimpleList") + return(l) + +} + +#' Convert ranks to signatures +#' +#' @param ranked The output of \code{rank_degs} +#' @author Alexander Bender +#' @keywords internal +#' @inheritParams create_signatures +#' +ranks2signatures <- function(ranked, min_prop, n, exclude_groups){ + + if(!is.null(exclude_groups)){ + if(!sum(exclude_groups %in% names(ranked))==length(exclude_groups)) + stop("Specified exclude_groups does not exist as a group name") + } + + if(is.null(names(ranked))) stop("ranked has no names") + if(min_prop > 1 | min_prop <= 0) stop("min_prop must be between >= 0 and <= 1") + if(is.finite(n)){ + if(n <= 0 | (n %% 1 > 0)) stop("n must be an even number larger 0 or Inf") + } + + # optionally remove exclude_groups from ranked + sdf <- setdiff(names(ranked), exclude_groups) + ranked <- ranked[sdf] + ranked <- sapply(names(ranked), function(x){ + + r <- ranked[[x]] + k <- intersect(names(r), sdf) + r[k] + + }, simplify = FALSE) + + nms <- names(ranked) + s <- sapply(nms, function(x){ + + genes_ranked <- ranked[[x]] + genes_ranked[[x]] <- NULL + + # rank matrix, low values means high ranks and viceversa, 1 means not present in that pairwise list: + rmat <- .rankmatrix(genes_ranked) + + # Get the genes that qualify as markers given the number of groups and the min_prop + isna <- !is.na(rmat) + from <- ncol(rmat) + to <- floor(min_prop*from) + to <- if(to==0) 1 else to + + med <- function(y) median(y,na.rm=TRUE) + + # take the rankmatrix (so each entry is the rank of the gene in the individual ranking), + # and then calculate the median of the ranks -- that is the final ranking metric + final <- lapply(from:to, function(n){ + + idx <- base::rowSums(isna)==n + if(sum(idx)==0) return(NULL) + + rmat_x <- rmat[idx,,drop=FALSE] + + # get the median of the ranks + mat <- apply(rmat_x, 1, med) + + # rank by the median + o <- order(mat) + + markers.df <- data.frame(rmat_x[o,,drop=FALSE], check.names=FALSE) + + return(markers.df) + + }) + + final <- do.call(rbind, final) + + final[!is.na(final)] <- 1 + final[is.na(final)] <- 0 + final <- head(final, n=n) + if(length(final) == 0){ + final <- NULL + } + + return(final) + + },simplify = FALSE) + + # Now, based on min_prop filter for groupwise signature genes + sigs <- lapply(names(s), function(x){ + + sx <- s[[x]] + if(is.null(sx)) return(NULL) + + min_cols <- floor(min_prop * ncol(sx)) + + keep <- apply(sx == 1, 1, sum) >= min_cols + out <- rownames(sx[keep,,drop=FALSE]) + out + + }) + + names(sigs) <- names(s) + + return(sigs) + +} + +#' Create group-specific gene signatures based on pairwise differential analysis results +#' +#' @param x List with DE results from \code{de_limma} run with \code{mode="pairwise"} +#' @param n The maximum number of top signature genes per group to return +#' @param min_prop A number greater zero and smaller or equal than 1. Defines the stringency of the signature. It is the proportion +#' of groups that a gene needs to be overexpressed to. A value of 1 means a gene must be overexpressed versus all other groups. A value +#' of 0.75 means overexpressed versus 75\% of other groups. +#' @param exclude_groups Vector with group names to exclude from signature creation. Can be helpful if a group is expected to be transitional between two other groups +#' so one would not expect unique genes defining this group. Can help returning more genes for the remaining groups. +#' @param signif_column Name of the significance column in \code{x}. +#' @param signif_threshold Threshold for significance. +#' @param effect_column Name of the effect column in \code{x}. +#' @param effect_threshold Threshold for effect size \(below this is "underexpressed", above is "overexpressed"\) +#' @param gene_column Name of the gene column in \code{x}. +#' @param rank_column Name of the column in \code{x} to use for ranking. +#' @param rank_method Either "decreasing" or "increasing" ranking direction +#' +#' @author Alexander Bender +#' +#' @examples +#' set.seed(1) +#' sce <- scuttle::mockSCE(ncells=1000, ngenes=1000, nspikes=0) +#' sce$group <- rep(LETTERS[1:4], each=250) +#' sce$donor <- rep(LETTERS[1:4], 250) +#' res_pairwise <- de_limma(x = sce, aggregate_by = c("group", "donor"), +#' main_covariate = "group", other_covariates = c("donor")) +#' create_signatures <- create_signatures(x = res_pairwise, signif_threshold = .9) +#' create_signatures +#' +#' @export +create_signatures <- function(x, n=Inf, min_prop=1, exclude_groups=NULL, + signif_column="adj.P.Val", signif_threshold=0.05, + effect_column="logFC", effect_threshold=0, + rank_column="P.Value", rank_method="increasing", + gene_column="Gene"){ + + mode_found <- x$params$mode + delim_found <- x$params$delim + pct_found <- x$params$min_pct + results_found <- x$results + + if(is.null(x$params)){ + stop("This does not look like the (unmodified) output of de_limma!") + } + + if(is.null(mode_found) | is.null(delim_found) | is.null(pct_found) | is.null(results_found)){ + stop("This does not look like the (unmodified) output of de_limma!") + } + + ranked <- rank_degs(res=x$results, delim=delim_found, signif_column=signif_column, signif_threshold=signif_threshold, + effect_column=effect_column, effect_threshold=effect_threshold, + rank_column=rank_column, rank_method=rank_method, + gene_column=gene_column, min_pct=x$params$min_pct) + + signatures <- ranks2signatures(ranked=ranked, n=n, min_prop=min_prop, exclude_groups=exclude_groups) + + return(signatures) + +} diff --git a/R/utils.R b/R/utils.R new file mode 100644 index 0000000..7e14696 --- /dev/null +++ b/R/utils.R @@ -0,0 +1,122 @@ +#' Calculate percentage of expression per group for each gene. +#' +#' +#' @param data Numeric data.frame, matrix or similar with rows being genes/observations and columns being cells/samples. +#' @param group Vector of length \code{ncol(data)} with group information +#' @param threshold Consider counts above this threshold as "expressed" +#' @param digits Round percentage to this digit +#' +#' @examples +#' ngenes <- 10 +#' nsamples <- 1000 +#' set.seed(1) +#' data <- matrix(rnbinom(ngenes*nsamples, mu=0.5, nsamples), ncol=nsamples, byrow=TRUE) +#' rownames(data) <- paste0("gene", 1:nrow(data)) +#' group <- rep(LETTERS[1:10], each=nsamples / 10) +#' get_pexpr(data, group) +#' +#' # Also works on Csparse matrix +#' data_sparse <- as(data, "CsparseMatrix") +#' get_pexpr(data_sparse, group) +#' +#' @author Alexander Bender +#' +#' @importFrom Matrix t +#' +#' @export +get_pexpr <- function(data, group, threshold=0, digits=2){ + + if(ncol(data)!=length(group)) stop("ncol(data) != length(group)") + if(!is.numeric(threshold) | threshold < 0) stop("threshold must be numeric and > 0") + + datar <- (data>threshold) * 1 + a <- rowsum(x=t(datar), group=group) + b <- as.numeric(table(group)[rownames(a)]) + f <- round(100*t(apply(a, 2, function(x) x/b)), digits=digits) + f + +} + +#' Makes all pairwise contrasts based on an intercept-less design +#' @importFrom utils combn +#' @importFrom limma makeContrasts +#' @keywords internal +.make_contrasts_pairwise <- function(u, design, delim){ + + all_combinations <- combn(u, 2) + + all_contrasts <- lapply(1:ncol(all_combinations), function(cc){ + + one <- all_combinations[1,cc,drop=TRUE] + two <- all_combinations[2,cc,drop=TRUE] + + p_raw <- paste0(one, "-", two) + p_valid <- paste0(make.names(one), "-", make.names(two)) + col_valid <- make.names(colnames(design)) + + co <- suppressWarnings(makeContrasts(contrasts = p_valid, levels=col_valid)) + rownames(co) <- colnames(design) + colnames(co) <- paste0(one, delim, two) + co + + }) + + contrasts <- do.call(cbind, all_contrasts) + colnames(contrasts) <- gsub(" - ", delim, colnames(contrasts)) + + return(contrasts) + +} + +#' Makes all averaging contrasts based on an intercept-less design +#' @importFrom limma makeContrasts +#' @keywords internal +.make_contrasts_average <- function(u, design){ + + all_contrasts <- lapply(u, function(cc){ + + # potentially invalid + one <- cc + two <- setdiff(u, cc) + two2 <- paste0("(", paste(two, collapse="+"), ") / ", length(two)) + + # valid + onev <- make.names(one) + twov <- make.names(two) + two2v <- paste0("(", paste(twov, collapse="+"), ") / ", length(twov)) + cnv <- make.names(colnames(design)) + cov <-paste0(onev, " - ", two2v) + con <- suppressWarnings(makeContrasts(contrasts = cov, levels = cnv)) + con + + rownames(con) <- colnames(design) + colnames(con) <- cc + con + + }) + + contrasts <- do.call(cbind, all_contrasts) + contrasts + + return(contrasts) + +} + +#' Modified from Kolde et al in \code{RobustRankAggreg::rankMatrix()} +#' @param x list with ranked elements +.rankmatrix <- function(x){ + u = unique(c(x, recursive = TRUE)) + N = length(u) + + rmat = matrix(NA, nrow=length(u), ncol=length(x), + dimnames=list(u, names(x))) + + for (i in names(x)) { + + rmat[x[[i]], i] = seq(1, length(x[[i]])) + + } + + return(rmat) + +} diff --git a/README.md b/README.md new file mode 100644 index 0000000..0d5bbfc --- /dev/null +++ b/README.md @@ -0,0 +1,15 @@ +# SigGenes + +![CI](https://github.com/ATpoint/SigGenes/actions/workflows/ci.yml/badge.svg) + +## Installation + +```r +# Install Bioconductor +if (!require("BiocManager", quietly = TRUE)) install.packages("BiocManager") +BiocManager::install() + +# Install limma and circtools +to_install <- c("edgeR", "limma", "Matrix", "S4Vectors", "scuttle", "SummarizedExperiment", "SingleCellExperiment", "atpoint/circtools") +BiocManager::install(to_install) +``` diff --git a/SigGenes.Rproj b/SigGenes.Rproj new file mode 100644 index 0000000..b9255bc --- /dev/null +++ b/SigGenes.Rproj @@ -0,0 +1,20 @@ +Version: 1.0 + +RestoreWorkspace: Default +SaveWorkspace: Default +AlwaysSaveHistory: Default + +EnableCodeIndexing: Yes +UseSpacesForTab: Yes +NumSpacesForTab: 2 +Encoding: UTF-8 + +RnwWeave: Sweave +LaTeX: pdfLaTeX + +AutoAppendNewline: Yes +StripTrailingWhitespace: Yes + +BuildType: Package +PackageUseDevtools: Yes +PackageInstallArgs: --no-multiarch --with-keep.source diff --git a/inst/extdata/haemopedia_subset.rds b/inst/extdata/haemopedia_subset.rds new file mode 100755 index 0000000..bc825b4 Binary files /dev/null and b/inst/extdata/haemopedia_subset.rds differ diff --git a/man/create_signatures.Rd b/man/create_signatures.Rd new file mode 100644 index 0000000..b0554e1 --- /dev/null +++ b/man/create_signatures.Rd @@ -0,0 +1,63 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/signatures.R +\name{create_signatures} +\alias{create_signatures} +\title{Create group-specific gene signatures based on pairwise differential analysis results} +\usage{ +create_signatures( + x, + n = Inf, + min_prop = 1, + exclude_groups = NULL, + signif_column = "adj.P.Val", + signif_threshold = 0.05, + effect_column = "logFC", + effect_threshold = 0, + rank_column = "P.Value", + rank_method = "increasing", + gene_column = "Gene" +) +} +\arguments{ +\item{x}{List with DE results from \code{de_limma} run with \code{mode="pairwise"}} + +\item{n}{The maximum number of top signature genes per group to return} + +\item{min_prop}{A number greater zero and smaller or equal than 1. Defines the stringency of the signature. It is the proportion +of groups that a gene needs to be overexpressed to. A value of 1 means a gene must be overexpressed versus all other groups. A value +of 0.75 means overexpressed versus 75\% of other groups.} + +\item{exclude_groups}{Vector with group names to exclude from signature creation. Can be helpful if a group is expected to be transitional between two other groups +so one would not expect unique genes defining this group. Can help returning more genes for the remaining groups.} + +\item{signif_column}{Name of the significance column in \code{x}.} + +\item{signif_threshold}{Threshold for significance.} + +\item{effect_column}{Name of the effect column in \code{x}.} + +\item{effect_threshold}{Threshold for effect size \(below this is "underexpressed", above is "overexpressed"\)} + +\item{rank_column}{Name of the column in \code{x} to use for ranking.} + +\item{rank_method}{Either "decreasing" or "increasing" ranking direction} + +\item{gene_column}{Name of the gene column in \code{x}.} +} +\description{ +Create group-specific gene signatures based on pairwise differential analysis results +} +\examples{ +set.seed(1) +sce <- scuttle::mockSCE(ncells=1000, ngenes=1000, nspikes=0) +sce$group <- rep(LETTERS[1:4], each=250) +sce$donor <- rep(LETTERS[1:4], 250) +res_pairwise <- de_limma(x = sce, aggregate_by = c("group", "donor"), + main_covariate = "group", other_covariates = c("donor")) +create_signatures <- create_signatures(x = res_pairwise, signif_threshold = .9) +create_signatures + +} +\author{ +Alexander Bender +} diff --git a/man/de_limma.Rd b/man/de_limma.Rd new file mode 100644 index 0000000..1bb121d --- /dev/null +++ b/man/de_limma.Rd @@ -0,0 +1,99 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/de_testing.R +\name{de_limma} +\alias{de_limma} +\title{Differential expression analysis on single-cell or pseudobulk level using limma-voom.} +\usage{ +de_limma( + x, + use_assay = "counts", + aggregate_by = NULL, + use_existing_sf = TRUE, + main_covariate, + other_covariates = NULL, + block = NULL, + min_pct = 0, + min_fc = 1, + use_weights = FALSE, + mode = c("pairwise", "average"), + use_filterByExpr = FALSE, + return_object = FALSE, + delim = "_vs_" +) +} +\arguments{ +\item{x}{A SingleCellExperiment} + +\item{use_assay}{Name of the assay representing the raw counts} + +\item{aggregate_by}{Vector with names of the colData columns to aggregate by. If NULL then +no aggregation is done prior to DE analysis.} + +\item{use_existing_sf}{Logical, whether to use the size factors from \code{sizeFactors(x)} for normalization. +Only applies if no pseudobulk aggregation is performed. If FALSE and no pseudobulk aggregation is performed then +normalization is done via the \code{calcNormFactors} function from `edgeR`. +The default is TRUE as one usually does DE analysis on single-cell level after preprocessing and cluster, so normalization +usually has already been performed. If TRUE and no size factors can be found will run library size normalization with the +\code{librarySizeFactors} function from `scuttle`.} + +\item{main_covariate}{Name of the main covariate for the testing.} + +\item{other_covariates}{Vector with other covariate names to adjust for in DE analysis.} + +\item{block}{Name of a factor to block for.} + +\item{min_pct}{The minimum percentage of cells in at least one of the two group levels per contrast expressing the gene (expressed means counts above 0).} + +\item{min_fc}{Minimum fold change to test against via \code{limma::treat()}.} + +\item{use_weights}{Logical, whether to use empirical sample weights in \code{edgeR::voomLmFit}} + +\item{mode}{Either "pairwise" to run all pairwise comparisons between `main_covariate` levels, +or "average" to test each level against the average of all other levels.} + +\item{use_filterByExpr}{Logical, whether to run \code{filterByExpr} from `edgeR` for prefiltering of counts. +This makes sense when testing for example bulk RNA-seq data.} + +\item{return_object}{Logical, whether to return the DGEList in the output} + +\item{delim}{A delimiter string used in the names of the output list.} +} +\description{ +Function accepts a SingleCellExperiment and expects an assay "counts" representing raw counts. +It then performs DE analysis forming all possible contrasts with limma-voom \(Law et al. 2014\), using the \code{voomLmFit} function from the `edgeR` package. +It is similar to running \code{voom} followed by \(optionally\) \code{duplicateCorrelation} and \code{lmFit}, for details see the details section in \code{edgeR::voomLmFit}. +DE testing is done by either contrasting all groupwise combinations, or by testing each group level vs the average of all other group levels. +The function can optionally aggregate single-cell data into pseudobulks. The function expects one group +to be the main covariate for testing and allows an arbitrary number of other covariates to be adjusted for. Alternatively, the user can specify a blocking +factor, for example to account for repeated measures. +} +\details{ +The output is a list with DE stats per contrast. All possible contrasts are displayed, for example for groups A/B/C it's A-B, A-C, B-C, B-A, C-A, C-B. The +individual results for identical contrasts e.g. A-B / B-A can differ if \code{min_pct} is above 0, as each list is filtered that the non-reference expresses +the gene with more than \code{min_pct} of cells. If {min_pct} is 0 then these two lists would be the same. +} +\examples{ +set.seed(1) +sce <- scuttle::mockSCE(ncells=1000, ngenes=1000, nspikes=0) +sce$group <- rep(LETTERS[1:4], each=250) +sce$donor <- rep(LETTERS[1:4], 250) +res_pairwise <- de_limma(x = sce, aggregate_by = c("group", "donor"), + main_covariate = "group", other_covariates = c("donor")) +res_pairwise_block <- de_limma(x = sce, aggregate_by = c("group", "donor"), + main_covariate = "group", block = "donor") +res_average <- de_limma(x = sce, aggregate_by = c("group", "donor"), + main_covariate = "group", other_covariates = c("donor"), + mode="average") + +} +\references{ +Law, CW, Chen, Y, Shi, W, Smyth, GK (2014). +Voom: precision weights unlock linear model analysis tools for RNA-seq read counts. Genome Biology 15, R29. +\url{doi:10.1186/gb-2014-15-2-r29} +} +\seealso{ +\code{\link{voom}} +} +\author{ +Alexander Bender +} diff --git a/man/dot-make_contrasts_average.Rd b/man/dot-make_contrasts_average.Rd new file mode 100644 index 0000000..636e9cd --- /dev/null +++ b/man/dot-make_contrasts_average.Rd @@ -0,0 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{.make_contrasts_average} +\alias{.make_contrasts_average} +\title{Makes all averaging contrasts based on an intercept-less design} +\usage{ +.make_contrasts_average(u, design) +} +\description{ +Makes all averaging contrasts based on an intercept-less design +} +\keyword{internal} diff --git a/man/dot-make_contrasts_pairwise.Rd b/man/dot-make_contrasts_pairwise.Rd new file mode 100644 index 0000000..17ddd5a --- /dev/null +++ b/man/dot-make_contrasts_pairwise.Rd @@ -0,0 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{.make_contrasts_pairwise} +\alias{.make_contrasts_pairwise} +\title{Makes all pairwise contrasts based on an intercept-less design} +\usage{ +.make_contrasts_pairwise(u, design, delim) +} +\description{ +Makes all pairwise contrasts based on an intercept-less design +} +\keyword{internal} diff --git a/man/dot-rankmatrix.Rd b/man/dot-rankmatrix.Rd new file mode 100644 index 0000000..da7f53d --- /dev/null +++ b/man/dot-rankmatrix.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{.rankmatrix} +\alias{.rankmatrix} +\title{Modified from Kolde et al in \code{RobustRankAggreg::rankMatrix()}} +\usage{ +.rankmatrix(x) +} +\arguments{ +\item{x}{list with ranked elements} +} +\description{ +Modified from Kolde et al in \code{RobustRankAggreg::rankMatrix()} +} diff --git a/man/get_pexpr.Rd b/man/get_pexpr.Rd new file mode 100644 index 0000000..f19fc9a --- /dev/null +++ b/man/get_pexpr.Rd @@ -0,0 +1,37 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{get_pexpr} +\alias{get_pexpr} +\title{Calculate percentage of expression per group for each gene.} +\usage{ +get_pexpr(data, group, threshold = 0, digits = 2) +} +\arguments{ +\item{data}{Numeric data.frame, matrix or similar with rows being genes/observations and columns being cells/samples.} + +\item{group}{Vector of length \code{ncol(data)} with group information} + +\item{threshold}{Consider counts above this threshold as "expressed"} + +\item{digits}{Round percentage to this digit} +} +\description{ +Calculate percentage of expression per group for each gene. +} +\examples{ +ngenes <- 10 +nsamples <- 1000 +set.seed(1) +data <- matrix(rnbinom(ngenes*nsamples, mu=0.5, nsamples), ncol=nsamples, byrow=TRUE) +rownames(data) <- paste0("gene", 1:nrow(data)) +group <- rep(LETTERS[1:10], each=nsamples / 10) +get_pexpr(data, group) + +# Also works on Csparse matrix +data_sparse <- as(data, "CsparseMatrix") +get_pexpr(data_sparse, group) + +} +\author{ +Alexander Bender +} diff --git a/man/rank_degs.Rd b/man/rank_degs.Rd new file mode 100644 index 0000000..da8f27c --- /dev/null +++ b/man/rank_degs.Rd @@ -0,0 +1,43 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/signatures.R +\name{rank_degs} +\alias{rank_degs} +\title{Internal function to filter and rank DEG results} +\usage{ +rank_degs( + res, + delim, + signif_column, + signif_threshold, + effect_column, + effect_threshold, + rank_column, + rank_method, + gene_column, + min_pct +) +} +\arguments{ +\item{res}{The output of \code{de_limma} run with \code{mode="pairwise"}} + +\item{signif_column}{Name of the significance column in \code{x}.} + +\item{signif_threshold}{Threshold for significance.} + +\item{effect_column}{Name of the effect column in \code{x}.} + +\item{effect_threshold}{Threshold for effect size \(below this is "underexpressed", above is "overexpressed"\)} + +\item{rank_column}{Name of the column in \code{x} to use for ranking.} + +\item{rank_method}{Either "decreasing" or "increasing" ranking direction} + +\item{gene_column}{Name of the gene column in \code{x}.} +} +\description{ +Internal function to filter and rank DEG results +} +\author{ +Alexander Bender +} +\keyword{internal} diff --git a/man/ranks2signatures.Rd b/man/ranks2signatures.Rd new file mode 100644 index 0000000..1854f97 --- /dev/null +++ b/man/ranks2signatures.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/signatures.R +\name{ranks2signatures} +\alias{ranks2signatures} +\title{Convert ranks to signatures} +\usage{ +ranks2signatures(ranked, min_prop, n, exclude_groups) +} +\arguments{ +\item{ranked}{The output of \code{rank_degs}} + +\item{min_prop}{A number greater zero and smaller or equal than 1. Defines the stringency of the signature. It is the proportion +of groups that a gene needs to be overexpressed to. A value of 1 means a gene must be overexpressed versus all other groups. A value +of 0.75 means overexpressed versus 75\% of other groups.} + +\item{n}{The maximum number of top signature genes per group to return} + +\item{exclude_groups}{Vector with group names to exclude from signature creation. Can be helpful if a group is expected to be transitional between two other groups +so one would not expect unique genes defining this group. Can help returning more genes for the remaining groups.} +} +\description{ +Convert ranks to signatures +} +\author{ +Alexander Bender +} +\keyword{internal} diff --git a/misc/demo.R b/misc/demo.R new file mode 100644 index 0000000..98c96ce --- /dev/null +++ b/misc/demo.R @@ -0,0 +1,56 @@ +#library(SigGenes) + +# Load RNA-seq data for CD4T-, CD8T and naive B cells from Haemopedia: +#counts <- readRDS(paste0( +# system.file("extdata",package="CreateGeneSignatures"), +# "/haemopedia_subset.rds")) + +counts <- readRDS("inst/extdata/haemopedia_subset.rds") + +library(SingleCellExperiment) +coldata <- DataFrame(group = gsub("\\..", "", colnames(counts)), row.names = colnames(counts), check.names = FALSE) +sce <- SingleCellExperiment(assays=list(counts = counts), colData = coldata) + +# Pairwise Mode +de <- de_limma(x = sce, use_assay = "counts", aggregate_by = NULL, + main_covariate = "group", min_fc = 1.5, use_existing_sf = FALSE, return_object = TRUE, + other_covariates = NULL, block = NULL, + min_pct = 0, use_weights = FALSE, + mode = "pairwise", delim = "_vs_", + use_filterByExpr = FALSE) + +signatures <- create_signatures(de, min_prop = 1, n = Inf) + +lengths(signatures) + +library(pheatmap) +logcpm <- log2(edgeR::cpm(de$DGEList,log=FALSE)+1) + +col_order <- unlist(lapply(names(signatures), function(x) which(de$DGEList$samples$group %in% x))) + +logcpmZ <- t(scale(t(logcpm[unique(unlist(signatures)),]))) +pheatmap(mat=logcpmZ[,col_order], + show_rownames=FALSE, cluster_rows=FALSE, cluster_cols=FALSE) + +# Average Mode +de2 <- de_voom(x = sce, use_assay = "counts", aggregate_by = NULL, + main_covariate = "group", min_fc = 1.5, use_existing_sf = FALSE, return_object = FALSE, + other_covariates = NULL, block = NULL, + min_pct = 0, use_weights = FALSE, + mode = "average", delim = "_vs_", + use_filterByExpr = FALSE) + +signatures2 <- sapply(de2, function(x){ + + xy <- x[x$logFC > 0 & x$adj.P.Val < 0.05,] + xy <- xy[order(xy$t, decreasing = TRUE),] + head(rownames(xy), 100) + +}, simplify = FALSE) + +lengths(signatures2) + +logcpmZ2 <- t(scale(t(logcpm[unique(unlist(signatures2)),]))) + +pheatmap(mat=logcpmZ2[,col_order], + show_rownames=FALSE, cluster_rows=FALSE, cluster_cols=FALSE) diff --git a/tests/testthat/all_tests.R b/tests/testthat/all_tests.R new file mode 100644 index 0000000..05e3484 --- /dev/null +++ b/tests/testthat/all_tests.R @@ -0,0 +1,83 @@ +# testthat::test_file("tests/testthat/all_tests.R") + +test_that("de_testing works", { + + set.seed(1) + sce <- scuttle::mockSCE(ncells=100, ngenes=11, nspikes=0) + sce$group <- rep(LETTERS[1:4], each=25) + sce$donor <- rep(LETTERS[1:4], 25) + + res1 <- de_limma(x = sce, aggregate_by = c("group", "donor"), main_covariate = "group", other_covariates = c("donor"), return_object = TRUE) + expect_true(is(res1, "SimpleList")) + expect_true(is(res1$results, "SimpleList")) + expect_true(is(res1$results$A_vs_B, "data.frame")) + expect_true(is(res1$DGEList, "DGEList")) + expect_true(is(res1$params, "list")) + rm(res1) + + res2 <- de_limma(x = sce, aggregate_by = c("group", "donor"), main_covariate = "group", other_covariates = c("donor"), return_object = TRUE, mode = "average") + expect_true(is(res2, "SimpleList")) + expect_true(is(res2$results, "SimpleList")) + expect_true(is(res2$results$A, "data.frame")) + expect_true(is(res2$DGEList, "DGEList")) + expect_true(is(res2$params, "list")) + rm(res2) + + expect_error(de_limma(x = sce, main_covariate = "group", use_assay = "not_existing")) + expect_error(de_limma(x = sce, main_covariate = "group", block = "not_existing")) + expect_error(de_limma(x = sce, other_covariates = "g")) + expect_error(de_limma(x = sce, aggregate_by = "g")) + expect_error(de_limma(x = assay(sce), aggregate_by = "g")) + expect_error(de_limma(x = assay(sce), min_pct = -1)) + expect_error(de_limma(x = assay(sce), min_fc = -1)) + expect_error(de_limma(x = assay(sce), mode = "3")) + +}) + +test_that("create_signatures works", { + + set.seed(1) + sce <- scuttle::mockSCE(ncells=100, ngenes=11, nspikes=0) + sce$group <- rep(LETTERS[1:4], each=25) + sce$donor <- rep(LETTERS[1:4], 25) + + res1 <- de_limma(x = sce, aggregate_by = c("group", "donor"), main_covariate = "group", other_covariates = c("donor"), return_object = TRUE) + sig <- create_signatures(res1, signif_threshold = .9) + expect_true(is(sig, "list")) + + res_backup <- res1 + res1$params <- NULL + expect_error(create_signatures(res1)) + + res1 <- res_backup + res1$results <- NULL + expect_error(create_signatures(res1)) + + res1 <- res_backup + res1$params$min_pct <- NULL + expect_error(create_signatures(res1)) + + cn <- colnames(res1$results$A_vs_B) + res1 <- res_backup + colnames(res1$results$A_vs_B)[which(cn=="logFC")] <- "x" + expect_error(create_signatures(res1)) + + res1 <- res_backup + colnames(res1$results$A_vs_B)[which(cn=="adj.P.Val")] <- "x" + expect_error(create_signatures(res1)) + + res1 <- res_backup + colnames(res1$results$A_vs_B)[which(cn=="P.Value")] <- "x" + expect_error(create_signatures(res1)) + + res1 <- res_backup + colnames(res1$results$A_vs_B)[which(cn=="Gene")] <- "x" + expect_error(create_signatures(res1)) + + expect_error(create_signatures(res_backup, min_prop = 2)) + expect_error(create_signatures(res_backup, n = -1)) + expect_error(create_signatures(res_backup, exclude_groups = "not_existing")) + +}) + +