Tempora is a novel cell trajectory inference method that orders cells using time information from time-series scRNAseq data. Tempora uses biological pathway information to help identify cell type relationships and can identify important time-dependent pathways to help interpret the inferred trajectory.
SCORPIUS an unsupervised approach for inferring linear developmental chronologies from single-cell RNA sequencing data.
The mapping of a principal graph into multidimensional data space is regularized by minimizing the stretching of the graph edges and the deviation from harmonicity for the graph stars.
CelliD is a robust statistical method that performs gene signature extraction and functional annotation for each individual cell in a single-cell RNA-seq dataset.
Unsupervised clustering of genes.
applies ensemble learning for single-cell differential expression analysis on single-cell RNA-seq dataset.
This is actually implemented in the main up, but only available if scDEA is installed. Since it is very heavy computationally it is documented only here.
seuratList = list() # list of count objects coming from the modified Seurat H5 reader
sampleNames <- c("Buffercells", "GFPcells")
# PROJECTFOLDER = 10X project folder that holds the 'counts' directory
# we are reading filtered H5 files.
for (sIdx in 1:2){
h5file <- paste0("PROJECTFOLDER/counts/",sampleNames[sIdx],"/outs/filtered_feature_bc_matrix.h5")
seuratList[[sIdx]] <- myRead10X_h5(h5file, use.names = TRUE, unique.features = TRUE, sampleName = sampleNames[sIdx])
scexSeurat = seuratList[[1]]
for (sIdx in 2){
scexSeurat <- cbind(scexSeurat, seuratList[[sIdx]])
# filter genes with no expression
scexSeurat = scexSeurat[rowSums(as.matrix(scexSeurat))>0,]
# make sure that no genes exist with the same overall expression. This would cause problems for some calculations.
scexSeurat = unique(as.matrix(scexSeurat))
scEx <- SingleCellExperiment(
assay = list(counts = scexSeurat),
colData = pd,
rowData = featuredata
featureData_summary <- data.frame(
"Description" = NA,
"gene_id" = my_gene$gene_id,
"Chromosome.Name" = my_gene$seqid,
"Associated.Gene.Name" = my_gene$gene_name,
stringsAsFactors = F
# featureData_summary <- featureData_summary[, -2]
featuredata <- data.frame(featureData_summary, stringsAsFactors = F)
featuredata$id <- featuredata$Associated.Gene.Name
featuredata$symbol <- make.unique(featuredata$Associated.Gene.Name)
featuredata <- featuredata[which(featuredata$symbol %in% rownames(scexSeurat)), ]
featuredata <- unique(featuredata)
featuredata[which(duplicated(featuredata$symbol)), ]
# rownames(featuredata) = 1:nrow(featuredata)
# featureDatatmp <- featuredata[, -2]
# featureDatatmp <- unique(featureDatatmp)
#same symbol, different ensg numbers:
# featuredata = featuredata[rownames(featureDatatmp), ]
rownames(featuredata) <- featuredata$symbol
nrFD = nrow(featuredata)
newRowNames = rownames(scexSeurat)[which(!rownames(scexSeurat) %in% featuredata$Associated.Gene.Name)]
if (length(newRowNames) > 0) {
featuredata[(nrFD + 1):(nrFD + length(newRowNames)),] <- NA
rownames(featuredata[(nrFD + 1):(nrFD + length(newRowNames)),]) <- newRowNames
featuredata[(nrFD + 1):(nrFD + length(newRowNames)),"symbol"] <- newRowNames
featuredata[(nrFD + 1):(nrFD + length(newRowNames)),"Description"] <- newRowNames
featuredata[(nrFD + 1):(nrFD + length(newRowNames)),"Associated.Gene.Name"] <- newRowNames
featuredata[(nrFD + 1):(nrFD + length(newRowNames)),"id"] <- newRowNames
rownames(scexSeurat)[which(!rownames(scexSeurat) %in% featuredata$Associated.Gene.Name)]
rownames(scexSeurat)[which(!featuredata$Associated.Gene.Name %in% rownames(scexSeurat)) ]
# scexSeurat <- scexSeurat[featuredata$Associated.Gene.Name, ]
featuredata <- featuredata[rownames(scexSeurat), ]
# featuredata$Description = featureData_summary93[featuredata$gene_id,"Description"]
pd <- data.frame(
barcode = sub("(.*)-(.*)", "\\1", colnames(scexSeurat)),
sampleNames = sub(".*-(.*)", "\\1", colnames(scexSeurat))
pd$barcode <- as.character(pd$barcode)
rownames(pd) <- colnames(scexSeurat)
hs.pairs <- readRDS("mouse_cycle_markers.rds")
ensembl <- mapIds(, keys=rownames(featuredata), keytype="SYMBOL", column="ENSEMBL")
assignments <- cyclone(scexSeurat, hs.pairs, gene.names=ensembl, BPPARAM = MulticoreParam())
pd$phases = assignments$phases
pd$G1score = assignments$normalized.scores$G1
pd$Sscore = assignments$normalized.scores$S
pd$G2Mscore = assignments$normalized.scores$G2M
pd$phases[$phases)] = "NA"
pd$G1score[$G1score)] = "NA"
pd$Sscore[$Sscore)] = "NA"
pd$G2Mscore[$G2Mscore)] = "NA"
# scExTemp <- applySingleR(scEx, DatabaseImmuneCellExpressionData(), "cellTypes")
# colData(scEx)$cellTypes = as.factor(colData(scExTemp)$cellTypes)
fname = "Ferdinand-scRNAseq"
outfile <- paste0(fname, ".RData")
save(file = outfile, list = c("scEx"))
# set.seed(1)
# colIdx <- sample(1:ncol(scexSeurat), 2000, replace = FALSE)
# scEx <- SingleCellExperiment(
# assay = list(counts = scexSeurat[, colIdx]),
# colData = pd[colIdx, ],
# rowData = featuredata
# )
# outfile <- paste0(fname, ".sml.RData")
# save(file = outfile, list = c("scEx"))
message = FALSE,
warning = FALSE,
echo = FALSE,
include = TRUE
output_file = paste0(fname, ".report.html"),
output_format = "html_document",
params = list(
fileN = paste0(fname, ".RData"),
min.genes = 2,
min.cells = 3,
low.thres1 = 2,
low.thres2 = -Inf,
high.thres1 = 2500,
high.thres2 = 2000