This repository has been archived by the owner on Apr 8, 2021. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathatrial_cell_cluster_isolation.R
168 lines (145 loc) · 4.87 KB
/
atrial_cell_cluster_isolation.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
library(bcbioSingleCell)
library(tidyverse)
dataDir <- file.path("data", Sys.Date())
load("data/2017-12-01/bcb_pool_filtered.rda")
load("data/2017-12-12/seurat.rda")
# Replot the tSNE
plotTSNE(seurat, pointSize = 0.5, pointAlpha = 0.8)
# Check that the dimensions match ==============================================
identical(
dim(bcb_pool_filtered),
dim(seurat@data)
)
# TRUE
identical(
colnames(bcb_pool_filtered),
colnames(seurat@data)
)
# TRUE
# Note that Seurat uses the gene symbols, with the original Ensembl ids stashed
# as the names of the rownames
identical(
rownames(bcb_pool_filtered),
names(rownames(seurat@data))
)
# TRUE
# Import incoming markers (2018-01-30 list) ====================================
library(basejump)
library(readxl)
xlsx_file <- file.path("meta", "markers_2018-01-30.xlsx")
sheets <- excel_sheets(xlsx_file)
# [1] "cardiomyocyte" "left_right_pattern" "epicardial_lineage"
# [4] "retinoic_acid_pathway" "sheer_stress" "atp_electron_transport"
# [7] "bulk_rnaseq_de"
new_markers <- lapply(
X = sheets,
FUN = function(sheet) {
read_xlsx(xlsx_file, sheet = sheet) %>%
pull(symbol)
}
)
names(new_markers) <- sheets
saveData(new_markers, dir = dataDir)
# Now we need to check that all of these marker symbols map to Ensembl gene
# identifiers, and are present in our dataset. For missing symbols, let's refer
# to our list of synonyms downloaded from Ensembl, saved in the basejump
# package.
# Here `annotable` contains the information for genes we detected, and
# `full_annotable` contains all genes from Ensembl
annotable <- annotable(bcb_pool_filtered)
full_annotable <- metadata(bcb_pool_filtered)$annotable
synonyms <- basejump::synonyms$homoSapiens
saveData(full_annotable, synonyms, dir = dataDir)
seurat_gene_ids <- rownames(seurat@data)
undetected_markers <- lapply(
X = new_markers,
FUN = function(x) {
x[which(!x %in% seurat_gene_ids)]
}
)
# 2018-01-31 unmapped symbols we need to update
# $cardiomyocyte
# [1] "CTNT" "NKX2.5" "SCN5a" "VIMP"
#
# $left_right_pattern
# [1] "Pitx2c" "nodal" "Lefty-1" "Lefty-2" "NKX3.2" "FGF-8"
#
# $epicardial_lineage
# [1] "WT-1" "ZO1" "SNAIL2"
#
# $retinoic_acid_pathway
# [1] "ADH1A" "CYP26C1" "HOXB1" "WNT8a" "WNT5a"
#
# $sheer_stress
# [1] "IP3R"
#
# $atp_electron_transport
# character(0)
#
# $bulk_rnaseq_de
# [1] "PWAR6" "PWAR5" "mir34-a" "CACNA1S" "GABRR1" "KCNK5" "CLCN1" "mmel1"
saveData(undetected_markers, dir = dataDir)
# Ensure all of the undetected markers symbols map correctly to the Ensembl
# gene identifiers, but that they're just not expressed.
lapply(
X = undetected_markers,
FUN = function(x) {
all(x %in% full_annotable[, "symbol"])
})
new_markers_detected <- lapply(
new_markers,
FUN = function(x) {
# Keep the input order Zaniar used in the original spreadsheet
x[which(x %in% seurat_gene_ids)]
}
)
saveData(new_markers_detected, dir = dataDir)
# New marker plots =============================================================
plotsDir <- file.path("results", "marker_plots", Sys.Date())
dir.create(plotsDir, recursive = TRUE)
# Loop across the aggregate markers and export in PNG format
lapply(
X = names(new_markers_detected),
FUN = function(name) {
png(file.path(plotsDir, paste0(name, ".png")), width = 1000, height = 1000)
p <- plotMarkerTSNE(seurat, genes = new_markers_detected[[name]])
# Have to use `print` inside an `lapply()` call
print(p)
dev.off()
}
)
gene2symbol <- gene2symbol(bcb_pool_filtered)
markers <- readCellTypeMarkersFile(
"meta/cellTypeMarkers.csv",
gene2symbol = gene2symbol)
atrial_markers <- markers %>%
filter(cell == "Atrial") %>%
pull("symbol")
atrial_markers
# "HEPACAM" "MYH6" "MYL4" "NKX2-5" "NPPA" "SCN5A" "TNNT2"
counts <- seurat@data
# Get the summaries for each marker gene
lapply(seq_along(atrial_markers), function(a) {
gene <- atrial_markers[[a]]
summary(counts[gene, , drop = TRUE])
}) %>%
set_names(atrial_markers)
# First, let's see how these genes are marking
plotMarkerTSNE(seurat, genes = atrial_markers, title = "Atrial markers")
# Let's look at the normalized expression data from Seurat
data <- FetchData(seurat, vars.all = atrial_markers)
data <- bcbioSingleCell:::.fetchGeneData.seurat(seurat, genes = atrial_markers)
geomean_x <- Matrix::colMeans(t(data))
head(geomean)
geomean <- colMeans(t(data))
geomean_2 <- rowMeans(data)
# Now, let's fetch the expression data in a more useful format. This builds
# upon the `Seurat::FetchData()` function but summarizes the counts into
# an `expression` column, and also calculates the `geomean`
data2 <- fetchTSNEExpressionData(seurat, genes = atrial_markers)
head(data2$geomean)
assignAndSaveData(
name = "atrial_marker_expression_data",
object = data,
dir = dataDir
)