-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcreate_sce.R
70 lines (66 loc) · 2.63 KB
/
create_sce.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
library(SingleCellExperiment)
# BiocManager::install("scater")
# BiocManager::install("biomaRt")
library(biomaRt)
library(scater)
create_sce_from_counts <- function(counts, colData, rowData = NULL) {
if(is.null(rowData)) {
sceset <- SingleCellExperiment(assays = list(counts = as.matrix(counts)),
colData = colData)
} else {
sceset <- SingleCellExperiment(assays = list(counts = as.matrix(counts)),
colData = colData,
rowData = rowData)
}
# this function writes to logcounts slot
exprs(sceset) <- log2(calculateCPM(sceset, use.size.factors = FALSE) + 1)
# use gene names as feature symbols
rowData(sceset)$feature_symbol <- rownames(sceset)
# remove features with duplicated names
if(is.null(rowData)) {
sceset <- sceset[!duplicated(rowData(sceset)$feature_symbol), ]
}
# QC
isSpike(sceset, "ERCC") <- grepl("^ERCC-", rownames(sceset))
sceset <- calculateQCMetrics(sceset, feature_controls = list("ERCC" = isSpike(sceset, "ERCC")))
return(sceset)
}
create_sce_from_normcounts <- function(normcounts, colData, rowData = NULL) {
if(is.null(rowData)) {
sceset <- SingleCellExperiment(assays = list(normcounts = as.matrix(normcounts)),
colData = colData)
} else {
sceset <- SingleCellExperiment(assays = list(normcounts = as.matrix(normcounts)),
colData = colData,
rowData = rowData)
}
logcounts(sceset) <- log2(normcounts(sceset) + 1)
# use gene names as feature symbols
rowData(sceset)$feature_symbol <- rownames(sceset)
# remove features with duplicated names
if(is.null(rowData)) {
sceset <- sceset[!duplicated(rowData(sceset)$feature_symbol), ]
}
# QC
isSpike(sceset, "ERCC") <- grepl("^ERCC-", rownames(sceset))
return(sceset)
}
create_sce_from_logcounts <- function(logcounts, colData, rowData = NULL) {
if(is.null(rowData)) {
sceset <- SingleCellExperiment(assays = list(logcounts = as.matrix(logcounts)),
colData = colData)
} else {
sceset <- SingleCellExperiment(assays = list(logcounts = as.matrix(logcounts)),
colData = colData,
rowData = rowData)
}
# use gene names as feature symbols
rowData(sceset)$feature_symbol <- rownames(sceset)
# remove features with duplicated names
if(is.null(rowData)) {
sceset <- sceset[!duplicated(rowData(sceset)$feature_symbol), ]
}
# QC
isSpike(sceset, "ERCC") <- grepl("^ERCC-", rownames(sceset))
return(sceset)
}