diff --git a/.Rbuildignore b/.Rbuildignore
new file mode 100644
index 0000000..91114bf
--- /dev/null
+++ b/.Rbuildignore
@@ -0,0 +1,2 @@
+^.*\.Rproj$
+^\.Rproj\.user$
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
new file mode 100644
index 0000000..f160a66
--- /dev/null
+++ b/.github/workflows/ci.yml
@@ -0,0 +1,31 @@
+name: CI
+
+on:
+  push:
+    branches:
+      - '**'
+    paths-ignore:
+      - '*.md'
+      - '*.MD'
+      - '*.ignore'
+      - LICENSE
+
+jobs:
+
+  SigGenes:
+
+    runs-on: ubuntu-latest
+
+    steps:
+
+      - uses: actions/checkout@v4
+
+      # Run everything via the Bioconductor docker image as it has almost all dependencies preinstalled
+      - name: devtools-check-docker
+        run: |
+          bioc_install='BiocManager::install(c("edgeR", "limma", "Matrix", "S4Vectors", "scuttle", "SummarizedExperiment", "SingleCellExperiment"))'
+          dev_check='devtools::check("/SigGenes/")'
+          dev_doc='devtools::document("/SigGenes/")'
+          testthat='testthat::test_file("/SigGenes/tests/testthat/all_tests.R")'
+          docker run -v "$(pwd)":"/SigGenes/" bioconductor/bioconductor_docker:RELEASE_3_18 Rscript --vanilla -e "${bioc_install}; ${dev_check}; ${dev_doc}; ${testthat}"
+          docker run -v "$(pwd)":"/SigGenes/" bioconductor/bioconductor_docker:RELEASE_3_19 Rscript --vanilla -e "${bioc_install}; ${dev_check}; ${dev_doc}; ${testthat}"
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..9a85036
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,9 @@
+..R*
+gitHeadInfo.gin
+R/.Rapp.history
+*.DS_*
+.Rproj.user
+.Rhistory
+~*
+*.tar.gz
+.Rproj.user
diff --git a/DESCRIPTION b/DESCRIPTION
new file mode 100644
index 0000000..cfb60b6
--- /dev/null
+++ b/DESCRIPTION
@@ -0,0 +1,24 @@
+Package: SigGenes
+Title: Automated differential analysis and signature generation for bulk and single-cell data
+Description: Automated differential analysis between groups based on limma-voom and generation of per-group signatures via custom filtering.
+Version: 1.0.0
+Author: Alexander Bender [aut,cre]
+Maintainer: Alexander Bender <atpoint90@gmail.com>
+License: LGPL (>=2)
+Encoding: UTF-8
+Depends:
+  R (>= 4.0),
+Imports: 
+  edgeR, 
+  limma,
+  Matrix,
+  methods,
+  S4Vectors, 
+  scuttle, 
+  SingleCellExperiment,
+  SummarizedExperiment
+Suggests:
+  testthat
+URL: https://github.com/atpoint/SigGenes
+BugReports: https://github.com/atpoint/SigGenes/issues
+RoxygenNote: 7.2.3
diff --git a/NAMESPACE b/NAMESPACE
new file mode 100644
index 0000000..0e88592
--- /dev/null
+++ b/NAMESPACE
@@ -0,0 +1,28 @@
+# Generated by roxygen2: do not edit by hand
+
+export(create_signatures)
+export(de_limma)
+export(get_pexpr)
+importFrom(Matrix,t)
+importFrom(S4Vectors,SimpleList)
+importFrom(SingleCellExperiment,"sizeFactors<-")
+importFrom(SingleCellExperiment,colData)
+importFrom(SingleCellExperiment,sizeFactors)
+importFrom(SummarizedExperiment,assay)
+importFrom(SummarizedExperiment,assayNames)
+importFrom(SummarizedExperiment,colData)
+importFrom(edgeR,DGEList)
+importFrom(edgeR,calcNormFactors)
+importFrom(edgeR,filterByExpr)
+importFrom(edgeR,voomLmFit)
+importFrom(limma,contrasts.fit)
+importFrom(limma,is.fullrank)
+importFrom(limma,makeContrasts)
+importFrom(limma,topTreat)
+importFrom(limma,treat)
+importFrom(methods,as)
+importFrom(methods,is)
+importFrom(scuttle,aggregateAcrossCells)
+importFrom(scuttle,librarySizeFactors)
+importFrom(stats,model.matrix)
+importFrom(utils,combn)
diff --git a/NEWS b/NEWS
new file mode 100644
index 0000000..8f10d51
--- /dev/null
+++ b/NEWS
@@ -0,0 +1,4 @@
+CHANGES IN VERSION 1.0.0
+------------------------
+
+    o Initial commit
diff --git a/R/de_testing.R b/R/de_testing.R
new file mode 100644
index 0000000..06f4c0a
--- /dev/null
+++ b/R/de_testing.R
@@ -0,0 +1,298 @@
+#' Differential expression analysis on single-cell or pseudobulk level using limma-voom.
+#'
+#' Function accepts a SingleCellExperiment and expects an assay "counts" representing raw counts.
+#' It then performs DE analysis forming all possible contrasts with limma-voom \(Law et al. 2014\), using the \code{voomLmFit} function from the `edgeR` package.
+#' It is similar to running \code{voom} followed by \(optionally\) \code{duplicateCorrelation} and \code{lmFit}, for details see the details section in \code{edgeR::voomLmFit}.
+#' DE testing is done by either contrasting all groupwise combinations, or by testing each group level vs the average of all other group levels.
+#' The function can optionally aggregate single-cell data into pseudobulks. The function expects one group
+#' to be the main covariate for testing and allows an arbitrary number of other covariates to be adjusted for. Alternatively, the user can specify a blocking
+#' factor, for example to account for repeated measures.
+#'
+#' The output is a list with DE stats per contrast. All possible contrasts are displayed, for example for groups A/B/C it's A-B, A-C, B-C, B-A, C-A, C-B. The
+#' individual results for identical contrasts e.g. A-B / B-A can differ if \code{min_pct} is above 0, as each list is filtered that the non-reference expresses
+#' the gene with more than \code{min_pct} of cells. If {min_pct} is 0 then these two lists would be the same.
+#'
+#'
+#' @param x A SingleCellExperiment
+#' @param use_assay Name of the assay representing the raw counts
+#' @param aggregate_by Vector with names of the colData columns to aggregate by. If NULL then
+#'   no aggregation is done prior to DE analysis.
+#' @param use_existing_sf Logical, whether to use the size factors from \code{sizeFactors(x)} for normalization.
+#'   Only applies if no pseudobulk aggregation is performed. If FALSE and no pseudobulk aggregation is performed then
+#'   normalization is done via the \code{calcNormFactors} function from `edgeR`.
+#'   The default is TRUE as one usually does DE analysis on single-cell level after preprocessing and cluster, so normalization
+#'   usually has already been performed. If TRUE and no size factors can be found will run library size normalization with the
+#'   \code{librarySizeFactors} function from `scuttle`.
+#' @param main_covariate Name of the main covariate for the testing.
+#' @param other_covariates Vector with other covariate names to adjust for in DE analysis.
+#' @param block Name of a factor to block for.
+#' @param min_pct The minimum percentage of cells in at least one of the two group levels per contrast expressing the gene (expressed means counts above 0).
+#' @param min_fc Minimum fold change to test against via \code{limma::treat()}.
+#' @param use_weights Logical, whether to use empirical sample weights in \code{edgeR::voomLmFit}
+#' @param mode Either "pairwise" to run all pairwise comparisons between `main_covariate` levels,
+#'   or "average" to test each level against the average of all other levels.
+#' @param use_filterByExpr Logical, whether to run \code{filterByExpr} from `edgeR` for prefiltering of counts.
+#'   This makes sense when testing for example bulk RNA-seq data.
+#' @param return_object Logical, whether to return the DGEList in the output
+#' @param delim A delimiter string used in the names of the output list.
+#'
+#' @examples
+#' set.seed(1)
+#' sce <- scuttle::mockSCE(ncells=1000, ngenes=1000, nspikes=0)
+#' sce$group <- rep(LETTERS[1:4], each=250)
+#' sce$donor <- rep(LETTERS[1:4], 250)
+#' res_pairwise <- de_limma(x = sce, aggregate_by = c("group", "donor"),
+#'                          main_covariate = "group", other_covariates = c("donor"))
+#' res_pairwise_block <- de_limma(x = sce, aggregate_by = c("group", "donor"),
+#'                                main_covariate = "group", block = "donor")
+#' res_average <- de_limma(x = sce, aggregate_by = c("group", "donor"),
+#'                         main_covariate = "group", other_covariates = c("donor"),
+#'                         mode="average")
+#'
+#' @author Alexander Bender
+#'
+#' @references
+#' Law, CW, Chen, Y, Shi, W, Smyth, GK (2014).
+#' Voom: precision weights unlock linear model analysis tools for RNA-seq read counts. Genome Biology 15, R29.
+#' \url{doi:10.1186/gb-2014-15-2-r29}
+#'
+#' @seealso \code{\link{voom}}
+#'
+#' @importFrom SingleCellExperiment colData sizeFactors sizeFactors<-
+#' @importFrom SummarizedExperiment assay assayNames colData
+#' @importFrom scuttle aggregateAcrossCells librarySizeFactors
+#' @importFrom edgeR calcNormFactors DGEList filterByExpr voomLmFit
+#' @importFrom stats model.matrix
+#' @importFrom limma contrasts.fit is.fullrank makeContrasts treat topTreat
+#' @importFrom utils combn
+#' @importFrom S4Vectors SimpleList
+#' @importFrom methods as is
+#'
+#' @export
+de_limma <- function(x, use_assay = "counts", aggregate_by=NULL, use_existing_sf = TRUE,
+                     main_covariate, other_covariates=NULL, block=NULL,
+                     min_pct=0, min_fc = 1.0, use_weights = FALSE,
+                     mode=c("pairwise", "average"),
+                     use_filterByExpr = FALSE, return_object = FALSE, delim="_vs_"){
+
+  # Checks
+  is_sce <- is(x, "SingleCellExperiment")
+  if(!is_sce) stop("x must be a SingleCellExperiment")
+
+  if(!use_assay %in% assayNames(x))
+    stop(paste("x contains no assay named", use_assay))
+
+  if(!is.null(aggregate_by)){
+    is_agg <- aggregate_by %in% colnames(colData(x))
+    if(!sum(is_agg) == length(aggregate_by)) stop("Not all entries in <aggregate_by> are in colData(x)")
+  }
+
+  if(!is.null(block)){
+    if(!block %in% colnames(colData(x)))
+      stop("Name of block not in colData(x)")
+  }
+
+  if(!is(use_existing_sf, "logical"))
+    stop("use_existing_sf must be logical")
+
+  is_main <- main_covariate %in% colnames(colData(x))
+  if(!is_main) stop("Not all entries in <main_covariate> are in colData(x)")
+
+  if(!is.null(other_covariates)){
+    is_other <- other_covariates %in% colnames(colData(x))
+    if(!is_other) stop("Not all entries in <other_covariates> are in colData(x)")
+  }
+
+  is_pct <- min_pct >= 0 & is.numeric(min_pct)
+  if(!is_pct) stop("min_pct must be numeric and not negative")
+
+  is_fc <- min_fc >= 1.0 & is.numeric(min_fc)
+  if(!is_fc) stop("is_fc must be numeric and >= 1.0")
+
+  if(!is.logical(use_weights)) stop("use_weights must be logical")
+
+  invisible(match.arg(mode, c("pairwise", "average")))
+  mode <- match.arg(mode)
+
+  if(!is(delim, "character")) stop("delim must be a charcter")
+
+  # Aggregate to pseudobulk
+  if(!is.null(aggregate_by)){
+
+    pb <- aggregateAcrossCells(x, id=colData(x)[,aggregate_by], use.assay.type="counts")
+    pb <- DGEList(counts = assay(pb), samples = data.frame(colData(pb), check.names = FALSE))
+    run_norm <- TRUE
+
+  } else {
+
+    w <- which(assayNames(x) == "counts")
+    pb <- DGEList(counts = assay(x, use_assay), samples = data.frame(colData(x), check.names = FALSE))
+    sf <- suppressWarnings(sizeFactors(x))
+    # scran::convertTo()
+    if (!is.null(sf)) {
+      nf <- log(sf/y$samples$lib.size)
+      nf <- exp(nf - mean(nf))
+      y$samples$norm.factors <- nf
+    }
+
+    if(use_existing_sf){
+
+      if(is.null(sizeFactors(x))){
+
+        sf <- librarySizeFactors(x)
+        sizeFactors(x) <- sf
+        run_norm <- FALSE
+
+      }
+
+      run_norm <- FALSE
+
+    } else {
+
+      run_norm <- TRUE
+
+    }
+
+  }
+
+  pb$samples <- droplevels.data.frame(pb$samples)
+  pb$genes <- NULL
+
+  # Require replication in at least one group
+  max_n <- max(table(pb$samples[[main_covariate]]))
+  if(max_n == 1) stop("No replication in any group -- DE analysis not possible!")
+
+  # Design, allowing any additional covariates
+  others <- if(!is.null(other_covariates)) paste0(" + ", other_covariates) else ""
+  f <- as.formula(paste0("~ 0 + ", main_covariate, others))
+  design <- model.matrix(f, pb$samples)
+  colnames(design) <- gsub(paste0("^", main_covariate), "", colnames(design))
+
+  is_fr <- is.fullrank(design)
+  if(!is_fr) stop("Design is not full-rank, meaning you cannot adjust for these covariates!")
+
+  # Optionally prefilter with edgeR
+  if(use_filterByExpr){
+
+    keep_fbe <- filterByExpr(pb, group = pb$samples[[main_covariate]])
+    pb <- pb[keep_fbe,]
+
+
+  }
+
+  # Prefilter to genes that in at least one group is expressed by, then normalize
+  pexp <- get_pexpr(assay(x, use_assay), group = x[[main_covariate]])
+
+  if(min_pct > 0){
+
+    keep <- apply(pexp >= min_pct, 1, sum) > 0
+    pb <- pb[keep,]
+
+  }
+
+  rm(x)
+
+  if(nrow(pb) == 0) return(NULL)
+  if(nrow(pb) < 10){
+    warning("Fewer than 10 genes (arbitrary threshold to trigger this warning) left after expression filter.")
+  }
+
+  # Only run normalization if doing pseudobulk analysis
+  if(run_norm){
+    pb <- calcNormFactors(pb, method = "TMMwsp")
+  }
+
+  # For now we do voomLmFit
+  if(is.null(block)){
+    blocker <- NULL
+  } else {
+    blocker <- pb$samples[[block]]
+  }
+
+  v <- voomLmFit(counts = pb, design = design, sample.weights = use_weights, block = blocker)
+
+  # Make all pairwise comparisons and make the contrasts.fit
+  ux <- pb$samples[[main_covariate]]
+  if(is(ux, "factor")){
+    u <- levels(droplevels(ux))
+  } else {
+    u <- unique(as.character(ux))
+  }
+
+  # -- Pairwise -- # -----------------------------------------------------------
+  if(mode=="pairwise"){
+
+    contrasts <- .make_contrasts_pairwise(u, design, delim)
+    v <- contrasts.fit(fit = v, contrasts = contrasts)
+    v <- treat(v, fc=min_fc)
+
+    # Extract results per contrast
+    iter <- colnames(contrasts)
+    de_results <- lapply(iter, function(i){
+
+      s <- strsplit(i, delim)[[1]]
+      first <- s[1]
+      second <- s[2]
+
+      tt <- topTreat(fit = v, coef = i, number=Inf)
+      tt <- tt[order(tt$t, decreasing = TRUE),]
+      pe <- pexp[rownames(tt), c(first, second)]
+      colnames(pe) <- c("pct.1", "pct.2")
+      tt <- cbind(tt, pe)
+
+      tt <- tt[tt$pct.1 >= min_pct | tt$pct.2 >= min_pct,]
+      tt$adj.P.Val <- p.adjust(tt$P.Value, "BH")
+      tt
+
+    })
+
+    names(de_results) <- iter
+
+  }
+
+  if(mode=="average"){
+
+    contrasts <- .make_contrasts_average(u, design)
+    v <- contrasts.fit(fit = v, contrasts = contrasts)
+    v <- treat(v, fc=min_fc)
+
+    # Extract significant genes per contrast
+    iter <- colnames(contrasts)
+    de_results <- lapply(iter, function(i){
+
+      tt <- topTreat(fit = v, coef = i, number=Inf)
+      tt <- tt[order(tt$t, decreasing = TRUE),]
+      pct.1 <- round(as.numeric(pexp[rownames(tt), i, drop=TRUE]), 2)
+      pct.2 <- round(as.numeric(apply(pexp[rownames(tt), setdiff(colnames(pexp), i), drop=FALSE], 1, mean)), 2)
+      tt$pct.1 <- pct.1
+      tt$pct.2 <- pct.2
+
+      tt <- tt[tt$pct.1 >= min_pct,]
+      tt$adj.P.Val <- p.adjust(tt$P.Value, "BH")
+      tt
+
+    })
+
+    names(de_results) <- iter
+
+  }
+
+  de_results <- sapply(de_results, function(x) data.frame(Gene = rownames(x), x), simplify = FALSE)
+  de_results <- as(de_results, "SimpleList")
+
+  to_return <- SimpleList(results = de_results)
+
+  if(return_object){
+
+    to_return[["DGEList"]] <- pb
+
+  }
+
+  to_return[["params"]] <- list()
+  to_return[["params"]]$mode <- mode
+  to_return[["params"]]$delim <- delim
+  to_return[["params"]]$min_pct <- min_pct
+
+  return(to_return)
+
+}
diff --git a/R/signatures.R b/R/signatures.R
new file mode 100644
index 0000000..8894ce9
--- /dev/null
+++ b/R/signatures.R
@@ -0,0 +1,258 @@
+#' Internal function to filter and rank DEG results
+#'
+#' @author Alexander Bender
+#'
+#' @param res The output of \code{de_limma} run with \code{mode="pairwise"}
+#'
+#' @inheritParams create_signatures
+#'
+#' @importFrom methods as is
+#'
+#' @keywords internal
+#'
+rank_degs <- function(res, delim,
+                      signif_column, signif_threshold,
+                      effect_column, effect_threshold,
+                      rank_column, rank_method,
+                      gene_column, min_pct){
+
+  if(!class(res) %in% c("list", "SimpleList") | is.null(names(res))){
+    stop("res must be a named list", call.=FALSE)
+  }
+
+  if(!all(grepl(delim, names(res)))) stop("delim was not found in all names of the res")
+
+  invisible(match.arg(arg=class(signif_threshold), choices="numeric"))
+  invisible(match.arg(arg=class(effect_threshold), choices="numeric"))
+
+  check.gf <- sum(unlist(lapply(res, function(x)
+    if(!gene_column %in% colnames(x)) return(1) else return(0))))
+  check.sf <- sum(unlist(lapply(res, function(x)
+    if(!signif_column %in% colnames(x)) return(1) else return(0))))
+  check.ef <- sum(unlist(lapply(res, function(x)
+    if(!effect_column %in% colnames(x)) return(1) else return(0))))
+  check.rk <- sum(unlist(lapply(res, function(x)
+    if(!rank_column %in% colnames(x)) return(1) else return(0))))
+
+  if(check.gf>0) stop("gene_column does not exist in all entries of res.list!")
+  if(check.sf>0) stop("signif_column does not exist in all entries of res.list!")
+  if(check.ef>0) stop("effect_column does not exist in all entries of res.list!")
+  if(check.rk>0) stop("rank_column does not exist in all entries of res.list!")
+
+  rank_method <- match.arg(rank_method, c("decreasing", "increasing"))
+
+  # Ranking
+  nm <- data.frame(t(as.data.frame(strsplit(names(res), delim), check.names = FALSE)), check.names = FALSE)
+  colnames(nm) <- c("first", "second")
+  rownames(nm) <- NULL
+  nm2 <- nm[,2:1]
+  nm$direction <- "forward"
+  nm2$direction <- "reverse"
+  colnames(nm2) <- colnames(nm)
+  nm <- rbind(nm, nm2)
+
+  u <- unique(c(nm$first, nm$second))
+
+  l <- sapply(u, function(f){
+
+    y <- nm[nm$first %in% f,]
+
+    y2 <- lapply(1:nrow(y), function(z){
+
+      yy <- y[z,,drop=FALSE]
+
+      if(yy$direction=="forward"){
+
+        current_contrast <- paste0(yy$first, delim, yy$second)
+
+        here <- res[[current_contrast]]
+        here <- here[here$pct.1 >= min_pct,]
+
+      } else {
+
+        current_contrast <- paste0(yy$second, delim, yy$first)
+        here <- res[[current_contrast]]
+        here[[effect_column]] <- here[[effect_column]] * -1
+        here <- here[here$pct.2 >= min_pct,]
+
+      }
+
+      h <- here[here[[signif_column]] < signif_threshold & here[[effect_column]] > effect_threshold,]
+      k <- if(rank_method=="decreasing") TRUE else FALSE
+      o <- order(h[[rank_column]], decreasing = k)
+      h[o,][[gene_column]]
+
+    })
+
+    names(y2) <- y$second
+    return(y2)
+
+
+  }, simplify = FALSE)
+
+  l <- as(l, "SimpleList")
+  return(l)
+
+}
+
+#' Convert ranks to signatures
+#'
+#' @param ranked The output of \code{rank_degs}
+#' @author Alexander Bender
+#' @keywords internal
+#' @inheritParams create_signatures
+#'
+ranks2signatures <- function(ranked, min_prop, n, exclude_groups){
+
+  if(!is.null(exclude_groups)){
+    if(!sum(exclude_groups %in% names(ranked))==length(exclude_groups))
+      stop("Specified exclude_groups does not exist as a group name")
+  }
+
+  if(is.null(names(ranked))) stop("ranked has no names")
+  if(min_prop > 1 | min_prop <= 0) stop("min_prop must be between >= 0 and <= 1")
+  if(is.finite(n)){
+    if(n <= 0 | (n %% 1 > 0)) stop("n must be an even number larger 0 or Inf")
+  }
+
+  # optionally remove exclude_groups from ranked
+  sdf <- setdiff(names(ranked), exclude_groups)
+  ranked <- ranked[sdf]
+  ranked <- sapply(names(ranked), function(x){
+
+    r <- ranked[[x]]
+    k <- intersect(names(r), sdf)
+    r[k]
+
+  }, simplify = FALSE)
+
+  nms <- names(ranked)
+  s <- sapply(nms, function(x){
+
+    genes_ranked <- ranked[[x]]
+    genes_ranked[[x]] <- NULL
+
+    # rank matrix, low values means high ranks and viceversa, 1 means not present in that pairwise list:
+    rmat <- .rankmatrix(genes_ranked)
+
+    # Get the genes that qualify as markers given the number of groups and the min_prop
+    isna <- !is.na(rmat)
+    from <- ncol(rmat)
+    to   <- floor(min_prop*from)
+    to <- if(to==0) 1 else to
+
+    med  <- function(y) median(y,na.rm=TRUE)
+
+    # take the rankmatrix (so each entry is the rank of the gene in the individual ranking),
+    # and then calculate the median of the ranks -- that is the final ranking metric
+    final <- lapply(from:to, function(n){
+
+      idx <- base::rowSums(isna)==n
+      if(sum(idx)==0) return(NULL)
+
+      rmat_x <- rmat[idx,,drop=FALSE]
+
+      # get the median of the ranks
+      mat <- apply(rmat_x, 1, med)
+
+      # rank by the median
+      o <- order(mat)
+
+      markers.df <- data.frame(rmat_x[o,,drop=FALSE], check.names=FALSE)
+
+      return(markers.df)
+
+    })
+
+    final <- do.call(rbind, final)
+
+    final[!is.na(final)] <- 1
+    final[is.na(final)]  <- 0
+    final <- head(final, n=n)
+    if(length(final) == 0){
+      final <- NULL
+    }
+
+    return(final)
+
+  },simplify = FALSE)
+
+  # Now, based on min_prop filter for groupwise signature genes
+  sigs <- lapply(names(s), function(x){
+
+    sx <- s[[x]]
+    if(is.null(sx)) return(NULL)
+
+    min_cols <- floor(min_prop * ncol(sx))
+
+    keep <- apply(sx == 1, 1, sum) >= min_cols
+    out <- rownames(sx[keep,,drop=FALSE])
+    out
+
+  })
+
+  names(sigs) <- names(s)
+
+  return(sigs)
+
+}
+
+#' Create group-specific gene signatures based on pairwise differential analysis results
+#'
+#' @param x List with DE results from \code{de_limma} run with \code{mode="pairwise"}
+#' @param n The maximum number of top signature genes per group to return
+#' @param min_prop A number greater zero and smaller or equal than 1. Defines the stringency of the signature. It is the proportion
+#'   of groups that a gene needs to be overexpressed to. A value of 1 means a gene must be overexpressed versus all other groups. A value
+#'   of 0.75 means overexpressed versus 75\% of other groups.
+#' @param exclude_groups Vector with group names to exclude from signature creation. Can be helpful if a group is expected to be transitional between two other groups
+#'   so one would not expect unique genes defining this group. Can help returning more genes for the remaining groups.
+#' @param signif_column Name of the significance column in \code{x}.
+#' @param signif_threshold Threshold for significance.
+#' @param effect_column Name of the effect column in \code{x}.
+#' @param effect_threshold Threshold for effect size \(below this is "underexpressed", above is "overexpressed"\)
+#' @param gene_column Name of the gene column in \code{x}.
+#' @param rank_column Name of the column in \code{x} to use for ranking.
+#' @param rank_method Either "decreasing" or "increasing" ranking direction
+#'
+#' @author Alexander Bender
+#'
+#' @examples
+#' set.seed(1)
+#' sce <- scuttle::mockSCE(ncells=1000, ngenes=1000, nspikes=0)
+#' sce$group <- rep(LETTERS[1:4], each=250)
+#' sce$donor <- rep(LETTERS[1:4], 250)
+#' res_pairwise <- de_limma(x = sce, aggregate_by = c("group", "donor"),
+#'                          main_covariate = "group", other_covariates = c("donor"))
+#' create_signatures <- create_signatures(x = res_pairwise, signif_threshold = .9)
+#' create_signatures
+#'
+#' @export
+create_signatures <- function(x, n=Inf, min_prop=1, exclude_groups=NULL,
+                              signif_column="adj.P.Val", signif_threshold=0.05,
+                              effect_column="logFC", effect_threshold=0,
+                              rank_column="P.Value", rank_method="increasing",
+                              gene_column="Gene"){
+
+  mode_found <- x$params$mode
+  delim_found <- x$params$delim
+  pct_found <- x$params$min_pct
+  results_found <- x$results
+
+  if(is.null(x$params)){
+    stop("This does not look like the (unmodified) output of de_limma!")
+  }
+
+  if(is.null(mode_found) | is.null(delim_found) | is.null(pct_found) | is.null(results_found)){
+    stop("This does not look like the (unmodified) output of de_limma!")
+  }
+
+  ranked <- rank_degs(res=x$results, delim=delim_found, signif_column=signif_column, signif_threshold=signif_threshold,
+                      effect_column=effect_column, effect_threshold=effect_threshold,
+                      rank_column=rank_column, rank_method=rank_method,
+                      gene_column=gene_column, min_pct=x$params$min_pct)
+
+  signatures <- ranks2signatures(ranked=ranked, n=n, min_prop=min_prop, exclude_groups=exclude_groups)
+
+  return(signatures)
+
+}
diff --git a/R/utils.R b/R/utils.R
new file mode 100644
index 0000000..7e14696
--- /dev/null
+++ b/R/utils.R
@@ -0,0 +1,122 @@
+#' Calculate percentage of expression per group for each gene.
+#'
+#'
+#' @param data Numeric data.frame, matrix or similar with rows being genes/observations and columns being cells/samples.
+#' @param group Vector of length \code{ncol(data)} with group information
+#' @param threshold Consider counts above this threshold as "expressed"
+#' @param digits Round percentage to this digit
+#'
+#' @examples
+#' ngenes <- 10
+#' nsamples <- 1000
+#' set.seed(1)
+#' data <- matrix(rnbinom(ngenes*nsamples, mu=0.5, nsamples), ncol=nsamples, byrow=TRUE)
+#' rownames(data) <- paste0("gene", 1:nrow(data))
+#' group <- rep(LETTERS[1:10], each=nsamples / 10)
+#' get_pexpr(data, group)
+#'
+#' # Also works on Csparse matrix
+#' data_sparse <- as(data, "CsparseMatrix")
+#' get_pexpr(data_sparse, group)
+#'
+#' @author Alexander Bender
+#'
+#' @importFrom Matrix t
+#'
+#' @export
+get_pexpr <- function(data, group, threshold=0, digits=2){
+
+  if(ncol(data)!=length(group)) stop("ncol(data) != length(group)")
+  if(!is.numeric(threshold) | threshold < 0) stop("threshold must be numeric and > 0")
+
+  datar <- (data>threshold) * 1
+  a <- rowsum(x=t(datar), group=group)
+  b <- as.numeric(table(group)[rownames(a)])
+  f <- round(100*t(apply(a, 2, function(x) x/b)), digits=digits)
+  f
+
+}
+
+#' Makes all pairwise contrasts based on an intercept-less design
+#' @importFrom utils combn
+#' @importFrom limma makeContrasts
+#' @keywords internal
+.make_contrasts_pairwise <- function(u, design, delim){
+
+  all_combinations <- combn(u, 2)
+
+  all_contrasts <- lapply(1:ncol(all_combinations), function(cc){
+
+    one <- all_combinations[1,cc,drop=TRUE]
+    two <- all_combinations[2,cc,drop=TRUE]
+
+    p_raw <- paste0(one, "-", two)
+    p_valid <- paste0(make.names(one), "-", make.names(two))
+    col_valid <- make.names(colnames(design))
+
+    co <- suppressWarnings(makeContrasts(contrasts = p_valid, levels=col_valid))
+    rownames(co) <- colnames(design)
+    colnames(co) <- paste0(one, delim, two)
+    co
+
+  })
+
+  contrasts <- do.call(cbind, all_contrasts)
+  colnames(contrasts) <- gsub("   -   ", delim, colnames(contrasts))
+
+  return(contrasts)
+
+}
+
+#' Makes all averaging contrasts based on an intercept-less design
+#' @importFrom limma makeContrasts
+#' @keywords internal
+.make_contrasts_average <- function(u, design){
+
+  all_contrasts <- lapply(u, function(cc){
+
+    # potentially invalid
+    one <- cc
+    two <- setdiff(u, cc)
+    two2 <- paste0("(", paste(two, collapse="+"), ") / ", length(two))
+
+    # valid
+    onev <- make.names(one)
+    twov <- make.names(two)
+    two2v <- paste0("(", paste(twov, collapse="+"), ") / ", length(twov))
+    cnv <- make.names(colnames(design))
+    cov <-paste0(onev, " - ", two2v)
+    con <- suppressWarnings(makeContrasts(contrasts = cov, levels = cnv))
+    con
+
+    rownames(con) <- colnames(design)
+    colnames(con) <- cc
+    con
+
+  })
+
+  contrasts <- do.call(cbind, all_contrasts)
+  contrasts
+
+  return(contrasts)
+
+}
+
+#' Modified from Kolde et al in \code{RobustRankAggreg::rankMatrix()}
+#' @param x list with ranked elements
+.rankmatrix <- function(x){
+  u = unique(c(x, recursive = TRUE))
+  N = length(u)
+
+  rmat = matrix(NA, nrow=length(u), ncol=length(x),
+                dimnames=list(u, names(x)))
+
+  for (i in names(x)) {
+
+    rmat[x[[i]], i] = seq(1, length(x[[i]]))
+
+  }
+
+  return(rmat)
+
+}
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..0d5bbfc
--- /dev/null
+++ b/README.md
@@ -0,0 +1,15 @@
+# SigGenes
+  
+![CI](https://github.com/ATpoint/SigGenes/actions/workflows/ci.yml/badge.svg)
+  
+## Installation
+
+```r
+# Install Bioconductor
+if (!require("BiocManager", quietly = TRUE)) install.packages("BiocManager")
+BiocManager::install()
+
+# Install limma and circtools
+to_install <- c("edgeR", "limma", "Matrix", "S4Vectors", "scuttle", "SummarizedExperiment", "SingleCellExperiment", "atpoint/circtools")
+BiocManager::install(to_install)
+```
diff --git a/SigGenes.Rproj b/SigGenes.Rproj
new file mode 100644
index 0000000..b9255bc
--- /dev/null
+++ b/SigGenes.Rproj
@@ -0,0 +1,20 @@
+Version: 1.0
+
+RestoreWorkspace: Default
+SaveWorkspace: Default
+AlwaysSaveHistory: Default
+
+EnableCodeIndexing: Yes
+UseSpacesForTab: Yes
+NumSpacesForTab: 2
+Encoding: UTF-8
+
+RnwWeave: Sweave
+LaTeX: pdfLaTeX
+
+AutoAppendNewline: Yes
+StripTrailingWhitespace: Yes
+
+BuildType: Package
+PackageUseDevtools: Yes
+PackageInstallArgs: --no-multiarch --with-keep.source
diff --git a/inst/extdata/haemopedia_subset.rds b/inst/extdata/haemopedia_subset.rds
new file mode 100755
index 0000000..bc825b4
Binary files /dev/null and b/inst/extdata/haemopedia_subset.rds differ
diff --git a/man/create_signatures.Rd b/man/create_signatures.Rd
new file mode 100644
index 0000000..b0554e1
--- /dev/null
+++ b/man/create_signatures.Rd
@@ -0,0 +1,63 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/signatures.R
+\name{create_signatures}
+\alias{create_signatures}
+\title{Create group-specific gene signatures based on pairwise differential analysis results}
+\usage{
+create_signatures(
+  x,
+  n = Inf,
+  min_prop = 1,
+  exclude_groups = NULL,
+  signif_column = "adj.P.Val",
+  signif_threshold = 0.05,
+  effect_column = "logFC",
+  effect_threshold = 0,
+  rank_column = "P.Value",
+  rank_method = "increasing",
+  gene_column = "Gene"
+)
+}
+\arguments{
+\item{x}{List with DE results from \code{de_limma} run with \code{mode="pairwise"}}
+
+\item{n}{The maximum number of top signature genes per group to return}
+
+\item{min_prop}{A number greater zero and smaller or equal than 1. Defines the stringency of the signature. It is the proportion
+of groups that a gene needs to be overexpressed to. A value of 1 means a gene must be overexpressed versus all other groups. A value
+of 0.75 means overexpressed versus 75\% of other groups.}
+
+\item{exclude_groups}{Vector with group names to exclude from signature creation. Can be helpful if a group is expected to be transitional between two other groups
+so one would not expect unique genes defining this group. Can help returning more genes for the remaining groups.}
+
+\item{signif_column}{Name of the significance column in \code{x}.}
+
+\item{signif_threshold}{Threshold for significance.}
+
+\item{effect_column}{Name of the effect column in \code{x}.}
+
+\item{effect_threshold}{Threshold for effect size \(below this is "underexpressed", above is "overexpressed"\)}
+
+\item{rank_column}{Name of the column in \code{x} to use for ranking.}
+
+\item{rank_method}{Either "decreasing" or "increasing" ranking direction}
+
+\item{gene_column}{Name of the gene column in \code{x}.}
+}
+\description{
+Create group-specific gene signatures based on pairwise differential analysis results
+}
+\examples{
+set.seed(1)
+sce <- scuttle::mockSCE(ncells=1000, ngenes=1000, nspikes=0)
+sce$group <- rep(LETTERS[1:4], each=250)
+sce$donor <- rep(LETTERS[1:4], 250)
+res_pairwise <- de_limma(x = sce, aggregate_by = c("group", "donor"),
+                         main_covariate = "group", other_covariates = c("donor"))
+create_signatures <- create_signatures(x = res_pairwise, signif_threshold = .9)
+create_signatures
+
+}
+\author{
+Alexander Bender
+}
diff --git a/man/de_limma.Rd b/man/de_limma.Rd
new file mode 100644
index 0000000..1bb121d
--- /dev/null
+++ b/man/de_limma.Rd
@@ -0,0 +1,99 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/de_testing.R
+\name{de_limma}
+\alias{de_limma}
+\title{Differential expression analysis on single-cell or pseudobulk level using limma-voom.}
+\usage{
+de_limma(
+  x,
+  use_assay = "counts",
+  aggregate_by = NULL,
+  use_existing_sf = TRUE,
+  main_covariate,
+  other_covariates = NULL,
+  block = NULL,
+  min_pct = 0,
+  min_fc = 1,
+  use_weights = FALSE,
+  mode = c("pairwise", "average"),
+  use_filterByExpr = FALSE,
+  return_object = FALSE,
+  delim = "_vs_"
+)
+}
+\arguments{
+\item{x}{A SingleCellExperiment}
+
+\item{use_assay}{Name of the assay representing the raw counts}
+
+\item{aggregate_by}{Vector with names of the colData columns to aggregate by. If NULL then
+no aggregation is done prior to DE analysis.}
+
+\item{use_existing_sf}{Logical, whether to use the size factors from \code{sizeFactors(x)} for normalization.
+Only applies if no pseudobulk aggregation is performed. If FALSE and no pseudobulk aggregation is performed then
+normalization is done via the \code{calcNormFactors} function from `edgeR`.
+The default is TRUE as one usually does DE analysis on single-cell level after preprocessing and cluster, so normalization
+usually has already been performed. If TRUE and no size factors can be found will run library size normalization with the
+\code{librarySizeFactors} function from `scuttle`.}
+
+\item{main_covariate}{Name of the main covariate for the testing.}
+
+\item{other_covariates}{Vector with other covariate names to adjust for in DE analysis.}
+
+\item{block}{Name of a factor to block for.}
+
+\item{min_pct}{The minimum percentage of cells in at least one of the two group levels per contrast expressing the gene (expressed means counts above 0).}
+
+\item{min_fc}{Minimum fold change to test against via \code{limma::treat()}.}
+
+\item{use_weights}{Logical, whether to use empirical sample weights in \code{edgeR::voomLmFit}}
+
+\item{mode}{Either "pairwise" to run all pairwise comparisons between `main_covariate` levels,
+or "average" to test each level against the average of all other levels.}
+
+\item{use_filterByExpr}{Logical, whether to run \code{filterByExpr} from `edgeR` for prefiltering of counts.
+This makes sense when testing for example bulk RNA-seq data.}
+
+\item{return_object}{Logical, whether to return the DGEList in the output}
+
+\item{delim}{A delimiter string used in the names of the output list.}
+}
+\description{
+Function accepts a SingleCellExperiment and expects an assay "counts" representing raw counts.
+It then performs DE analysis forming all possible contrasts with limma-voom \(Law et al. 2014\), using the \code{voomLmFit} function from the `edgeR` package.
+It is similar to running \code{voom} followed by \(optionally\) \code{duplicateCorrelation} and \code{lmFit}, for details see the details section in \code{edgeR::voomLmFit}.
+DE testing is done by either contrasting all groupwise combinations, or by testing each group level vs the average of all other group levels.
+The function can optionally aggregate single-cell data into pseudobulks. The function expects one group
+to be the main covariate for testing and allows an arbitrary number of other covariates to be adjusted for. Alternatively, the user can specify a blocking
+factor, for example to account for repeated measures.
+}
+\details{
+The output is a list with DE stats per contrast. All possible contrasts are displayed, for example for groups A/B/C it's A-B, A-C, B-C, B-A, C-A, C-B. The
+individual results for identical contrasts e.g. A-B / B-A can differ if \code{min_pct} is above 0, as each list is filtered that the non-reference expresses
+the gene with more than \code{min_pct} of cells. If {min_pct} is 0 then these two lists would be the same.
+}
+\examples{
+set.seed(1)
+sce <- scuttle::mockSCE(ncells=1000, ngenes=1000, nspikes=0)
+sce$group <- rep(LETTERS[1:4], each=250)
+sce$donor <- rep(LETTERS[1:4], 250)
+res_pairwise <- de_limma(x = sce, aggregate_by = c("group", "donor"),
+                         main_covariate = "group", other_covariates = c("donor"))
+res_pairwise_block <- de_limma(x = sce, aggregate_by = c("group", "donor"),
+                               main_covariate = "group", block = "donor")
+res_average <- de_limma(x = sce, aggregate_by = c("group", "donor"),
+                        main_covariate = "group", other_covariates = c("donor"),
+                        mode="average")
+
+}
+\references{
+Law, CW, Chen, Y, Shi, W, Smyth, GK (2014).
+Voom: precision weights unlock linear model analysis tools for RNA-seq read counts. Genome Biology 15, R29.
+\url{doi:10.1186/gb-2014-15-2-r29}
+}
+\seealso{
+\code{\link{voom}}
+}
+\author{
+Alexander Bender
+}
diff --git a/man/dot-make_contrasts_average.Rd b/man/dot-make_contrasts_average.Rd
new file mode 100644
index 0000000..636e9cd
--- /dev/null
+++ b/man/dot-make_contrasts_average.Rd
@@ -0,0 +1,12 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/utils.R
+\name{.make_contrasts_average}
+\alias{.make_contrasts_average}
+\title{Makes all averaging contrasts based on an intercept-less design}
+\usage{
+.make_contrasts_average(u, design)
+}
+\description{
+Makes all averaging contrasts based on an intercept-less design
+}
+\keyword{internal}
diff --git a/man/dot-make_contrasts_pairwise.Rd b/man/dot-make_contrasts_pairwise.Rd
new file mode 100644
index 0000000..17ddd5a
--- /dev/null
+++ b/man/dot-make_contrasts_pairwise.Rd
@@ -0,0 +1,12 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/utils.R
+\name{.make_contrasts_pairwise}
+\alias{.make_contrasts_pairwise}
+\title{Makes all pairwise contrasts based on an intercept-less design}
+\usage{
+.make_contrasts_pairwise(u, design, delim)
+}
+\description{
+Makes all pairwise contrasts based on an intercept-less design
+}
+\keyword{internal}
diff --git a/man/dot-rankmatrix.Rd b/man/dot-rankmatrix.Rd
new file mode 100644
index 0000000..da7f53d
--- /dev/null
+++ b/man/dot-rankmatrix.Rd
@@ -0,0 +1,14 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/utils.R
+\name{.rankmatrix}
+\alias{.rankmatrix}
+\title{Modified from Kolde et al in \code{RobustRankAggreg::rankMatrix()}}
+\usage{
+.rankmatrix(x)
+}
+\arguments{
+\item{x}{list with ranked elements}
+}
+\description{
+Modified from Kolde et al in \code{RobustRankAggreg::rankMatrix()}
+}
diff --git a/man/get_pexpr.Rd b/man/get_pexpr.Rd
new file mode 100644
index 0000000..f19fc9a
--- /dev/null
+++ b/man/get_pexpr.Rd
@@ -0,0 +1,37 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/utils.R
+\name{get_pexpr}
+\alias{get_pexpr}
+\title{Calculate percentage of expression per group for each gene.}
+\usage{
+get_pexpr(data, group, threshold = 0, digits = 2)
+}
+\arguments{
+\item{data}{Numeric data.frame, matrix or similar with rows being genes/observations and columns being cells/samples.}
+
+\item{group}{Vector of length \code{ncol(data)} with group information}
+
+\item{threshold}{Consider counts above this threshold as "expressed"}
+
+\item{digits}{Round percentage to this digit}
+}
+\description{
+Calculate percentage of expression per group for each gene.
+}
+\examples{
+ngenes <- 10
+nsamples <- 1000
+set.seed(1)
+data <- matrix(rnbinom(ngenes*nsamples, mu=0.5, nsamples), ncol=nsamples, byrow=TRUE)
+rownames(data) <- paste0("gene", 1:nrow(data))
+group <- rep(LETTERS[1:10], each=nsamples / 10)
+get_pexpr(data, group)
+
+# Also works on Csparse matrix
+data_sparse <- as(data, "CsparseMatrix")
+get_pexpr(data_sparse, group)
+
+}
+\author{
+Alexander Bender
+}
diff --git a/man/rank_degs.Rd b/man/rank_degs.Rd
new file mode 100644
index 0000000..da8f27c
--- /dev/null
+++ b/man/rank_degs.Rd
@@ -0,0 +1,43 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/signatures.R
+\name{rank_degs}
+\alias{rank_degs}
+\title{Internal function to filter and rank DEG results}
+\usage{
+rank_degs(
+  res,
+  delim,
+  signif_column,
+  signif_threshold,
+  effect_column,
+  effect_threshold,
+  rank_column,
+  rank_method,
+  gene_column,
+  min_pct
+)
+}
+\arguments{
+\item{res}{The output of \code{de_limma} run with \code{mode="pairwise"}}
+
+\item{signif_column}{Name of the significance column in \code{x}.}
+
+\item{signif_threshold}{Threshold for significance.}
+
+\item{effect_column}{Name of the effect column in \code{x}.}
+
+\item{effect_threshold}{Threshold for effect size \(below this is "underexpressed", above is "overexpressed"\)}
+
+\item{rank_column}{Name of the column in \code{x} to use for ranking.}
+
+\item{rank_method}{Either "decreasing" or "increasing" ranking direction}
+
+\item{gene_column}{Name of the gene column in \code{x}.}
+}
+\description{
+Internal function to filter and rank DEG results
+}
+\author{
+Alexander Bender
+}
+\keyword{internal}
diff --git a/man/ranks2signatures.Rd b/man/ranks2signatures.Rd
new file mode 100644
index 0000000..1854f97
--- /dev/null
+++ b/man/ranks2signatures.Rd
@@ -0,0 +1,27 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/signatures.R
+\name{ranks2signatures}
+\alias{ranks2signatures}
+\title{Convert ranks to signatures}
+\usage{
+ranks2signatures(ranked, min_prop, n, exclude_groups)
+}
+\arguments{
+\item{ranked}{The output of \code{rank_degs}}
+
+\item{min_prop}{A number greater zero and smaller or equal than 1. Defines the stringency of the signature. It is the proportion
+of groups that a gene needs to be overexpressed to. A value of 1 means a gene must be overexpressed versus all other groups. A value
+of 0.75 means overexpressed versus 75\% of other groups.}
+
+\item{n}{The maximum number of top signature genes per group to return}
+
+\item{exclude_groups}{Vector with group names to exclude from signature creation. Can be helpful if a group is expected to be transitional between two other groups
+so one would not expect unique genes defining this group. Can help returning more genes for the remaining groups.}
+}
+\description{
+Convert ranks to signatures
+}
+\author{
+Alexander Bender
+}
+\keyword{internal}
diff --git a/misc/demo.R b/misc/demo.R
new file mode 100644
index 0000000..98c96ce
--- /dev/null
+++ b/misc/demo.R
@@ -0,0 +1,56 @@
+#library(SigGenes)
+
+# Load RNA-seq data for CD4T-, CD8T and naive B cells from Haemopedia:
+#counts <- readRDS(paste0(
+#            system.file("extdata",package="CreateGeneSignatures"),
+#            "/haemopedia_subset.rds"))
+
+counts <- readRDS("inst/extdata/haemopedia_subset.rds")
+
+library(SingleCellExperiment)
+coldata <- DataFrame(group = gsub("\\..", "", colnames(counts)), row.names = colnames(counts), check.names = FALSE)
+sce <- SingleCellExperiment(assays=list(counts = counts), colData = coldata)
+
+# Pairwise Mode
+de <- de_limma(x = sce, use_assay = "counts", aggregate_by = NULL,
+               main_covariate = "group", min_fc = 1.5, use_existing_sf = FALSE, return_object = TRUE,
+               other_covariates = NULL, block = NULL,
+               min_pct = 0, use_weights = FALSE,
+               mode = "pairwise", delim = "_vs_",
+               use_filterByExpr = FALSE)
+
+signatures <- create_signatures(de, min_prop = 1, n = Inf)
+
+lengths(signatures)
+
+library(pheatmap)
+logcpm <- log2(edgeR::cpm(de$DGEList,log=FALSE)+1)
+
+col_order <- unlist(lapply(names(signatures), function(x) which(de$DGEList$samples$group %in% x)))
+
+logcpmZ <- t(scale(t(logcpm[unique(unlist(signatures)),])))
+pheatmap(mat=logcpmZ[,col_order],
+         show_rownames=FALSE, cluster_rows=FALSE, cluster_cols=FALSE)
+
+# Average Mode
+de2 <- de_voom(x = sce, use_assay = "counts", aggregate_by = NULL,
+               main_covariate = "group", min_fc = 1.5, use_existing_sf = FALSE, return_object = FALSE,
+               other_covariates = NULL, block = NULL,
+               min_pct = 0, use_weights = FALSE,
+               mode = "average", delim = "_vs_",
+               use_filterByExpr = FALSE)
+
+signatures2 <- sapply(de2, function(x){
+
+  xy <- x[x$logFC > 0 & x$adj.P.Val < 0.05,]
+  xy <- xy[order(xy$t, decreasing = TRUE),]
+  head(rownames(xy), 100)
+
+}, simplify = FALSE)
+
+lengths(signatures2)
+
+logcpmZ2 <- t(scale(t(logcpm[unique(unlist(signatures2)),])))
+
+pheatmap(mat=logcpmZ2[,col_order],
+         show_rownames=FALSE, cluster_rows=FALSE, cluster_cols=FALSE)
diff --git a/tests/testthat/all_tests.R b/tests/testthat/all_tests.R
new file mode 100644
index 0000000..05e3484
--- /dev/null
+++ b/tests/testthat/all_tests.R
@@ -0,0 +1,83 @@
+# testthat::test_file("tests/testthat/all_tests.R")
+
+test_that("de_testing works", {
+
+  set.seed(1)
+  sce <- scuttle::mockSCE(ncells=100, ngenes=11, nspikes=0)
+  sce$group <- rep(LETTERS[1:4], each=25)
+  sce$donor <- rep(LETTERS[1:4], 25)
+
+  res1 <- de_limma(x = sce, aggregate_by = c("group", "donor"), main_covariate = "group", other_covariates = c("donor"), return_object = TRUE)
+  expect_true(is(res1, "SimpleList"))
+  expect_true(is(res1$results, "SimpleList"))
+  expect_true(is(res1$results$A_vs_B, "data.frame"))
+  expect_true(is(res1$DGEList, "DGEList"))
+  expect_true(is(res1$params, "list"))
+  rm(res1)
+
+  res2 <- de_limma(x = sce, aggregate_by = c("group", "donor"), main_covariate = "group", other_covariates = c("donor"), return_object = TRUE, mode = "average")
+  expect_true(is(res2, "SimpleList"))
+  expect_true(is(res2$results, "SimpleList"))
+  expect_true(is(res2$results$A, "data.frame"))
+  expect_true(is(res2$DGEList, "DGEList"))
+  expect_true(is(res2$params, "list"))
+  rm(res2)
+
+  expect_error(de_limma(x = sce, main_covariate = "group", use_assay = "not_existing"))
+  expect_error(de_limma(x = sce, main_covariate = "group", block = "not_existing"))
+  expect_error(de_limma(x = sce, other_covariates = "g"))
+  expect_error(de_limma(x = sce, aggregate_by = "g"))
+  expect_error(de_limma(x = assay(sce), aggregate_by = "g"))
+  expect_error(de_limma(x = assay(sce), min_pct = -1))
+  expect_error(de_limma(x = assay(sce), min_fc = -1))
+  expect_error(de_limma(x = assay(sce), mode = "3"))
+
+})
+
+test_that("create_signatures works", {
+
+  set.seed(1)
+  sce <- scuttle::mockSCE(ncells=100, ngenes=11, nspikes=0)
+  sce$group <- rep(LETTERS[1:4], each=25)
+  sce$donor <- rep(LETTERS[1:4], 25)
+
+  res1 <- de_limma(x = sce, aggregate_by = c("group", "donor"), main_covariate = "group", other_covariates = c("donor"), return_object = TRUE)
+  sig <- create_signatures(res1, signif_threshold = .9)
+  expect_true(is(sig, "list"))
+
+  res_backup <- res1
+  res1$params <- NULL
+  expect_error(create_signatures(res1))
+
+  res1 <- res_backup
+  res1$results <- NULL
+  expect_error(create_signatures(res1))
+
+  res1 <- res_backup
+  res1$params$min_pct <- NULL
+  expect_error(create_signatures(res1))
+
+  cn <- colnames(res1$results$A_vs_B)
+  res1 <- res_backup
+  colnames(res1$results$A_vs_B)[which(cn=="logFC")] <- "x"
+  expect_error(create_signatures(res1))
+
+  res1 <- res_backup
+  colnames(res1$results$A_vs_B)[which(cn=="adj.P.Val")] <- "x"
+  expect_error(create_signatures(res1))
+
+  res1 <- res_backup
+  colnames(res1$results$A_vs_B)[which(cn=="P.Value")] <- "x"
+  expect_error(create_signatures(res1))
+
+  res1 <- res_backup
+  colnames(res1$results$A_vs_B)[which(cn=="Gene")] <- "x"
+  expect_error(create_signatures(res1))
+
+  expect_error(create_signatures(res_backup, min_prop = 2))
+  expect_error(create_signatures(res_backup, n = -1))
+  expect_error(create_signatures(res_backup, exclude_groups = "not_existing"))
+
+})
+
+