-
Notifications
You must be signed in to change notification settings - Fork 9
/
10x_sACC-n5_step04_comparison-publishedData_MNT.R
282 lines (234 loc) · 13 KB
/
10x_sACC-n5_step04_comparison-publishedData_MNT.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
### MNT 10x snRNA-seq workflow: step 04
### **Region-specific analyses**
### - (5x) sACC samples (M & F donors)
### - Comparison to Velmeshev, et al (Science 2019)
#####################################################################
library(SingleCellExperiment)
library(EnsDb.Hsapiens.v86)
library(scater)
library(scran)
library(batchelor)
library(DropletUtils)
library(jaffelab)
library(limma)
library(pheatmap)
library(RColorBrewer)
### Palette taken from `scater`
tableau10medium = c("#729ECE", "#FF9E4A", "#67BF5C", "#ED665D",
"#AD8BC9", "#A8786E", "#ED97CA", "#A2A2A2",
"#CDCC5D", "#6DCCDA")
tableau20 = c("#1F77B4", "#AEC7E8", "#FF7F0E", "#FFBB78", "#2CA02C",
"#98DF8A", "#D62728", "#FF9896", "#9467BD", "#C5B0D5",
"#8C564B", "#C49C94", "#E377C2", "#F7B6D2", "#7F7F7F",
"#C7C7C7", "#BCBD22", "#DBDB8D", "#17BECF", "#9EDAE5")
# ===
### Comparison to Velmeshev, et al (PFC & ACC) ========
## Load within-ACC statistics
load("/dcl01/ajaffe/data/lab/singleCell/velmeshev2019/analysis_MNT/markers-stats_velmeshev-et-al_ASD-cortex-withinRegion_findMarkers-SN-LEVEL_MNTAug2020.rda",
verbose=T)
# markers.asdVelm.t.pfc, markers.asdVelm.t.acc
#rm(markers.asdVelm.t.pfc)
load("/dcl01/ajaffe/data/lab/singleCell/velmeshev2019/analysis_MNT/SCE_asd-velmeshev-etal_MNT.rda", verbose=T)
# sce.asd, sce.asd.pfc, sce.asd.acc
sce.asd
# class: SingleCellExperiment
# dim: 36501 104559
# metadata(0):
# assays(1): logcounts
# rownames(36501): ENSG00000227232 ENSG00000243485 ... ENSG00000210195
# ENSG00000210196
# rowData names(0):
# colnames(104559): AAACCTGGTACGCACC-1_1823_BA24
# AAACGGGCACCAGATT-1_1823_BA24 ... TTTGTCATCCCAAGTA-1_6033_BA9
# TTTGTCATCGTTACGA-1_6033_BA9
# colData names(17): cell cluster ... BAregion contrast
# reducedDimNames(1): TSNE.prov
# altExpNames(0):
sce.asd.pfc <- sce.asd[ ,sce.asd$region=="PFC"]
sce.asd.acc <- sce.asd[ ,sce.asd$region=="ACC"]
rm(sce.asd)
# Need to convert Symbol in sce.dlpfc > EnsemblID, and also use n nuclei for t.stat
load("rdas/revision/regionSpecific_sACC-n5_cleaned-combined_SCE_MNT2021.rda", verbose=T)
# sce.sacc, chosen.hvgs.sacc, pc.choice.sacc, clusterRefTab.sacc, ref.sampleInfo, annotationTab.sacc, cell_colors.sacc
sce.sacc
# class: SingleCellExperiment
# dim: 33538 15669
# metadata(2): merge.info pca.info
# assays(2): counts logcounts
# rownames(33538): MIR1302-2HG FAM138A ... AC213203.1 FAM231C
# rowData names(6): gene_id gene_version ... gene_biotype Symbol.uniq
# colnames(15669): AAACCCAAGAGTCTTC-1 AAACCCAAGGCCCGTT-1 ...
# TTTGACTGTATCGTGT-1 TTTGGTTAGCAGCACA-1
# colData names(20): Sample Barcode ... collapsedCluster cellType
# reducedDimNames(4): PCA_corrected PCA_opt TSNE UMAP
# altExpNames(0):
table(sce.sacc$cellType)
# Astro_A Astro_B drop.doublet drop.lowNTx Excit_A Excit_B
# 747 160 28 298 856 575
# Excit_C Excit_D Excit_E Excit_F Excit_G Inhib_A
# 1735 311 428 228 30 842
# Inhib_B Inhib_C Inhib_D Inhib_E Inhib_F Inhib_G
# 912 465 384 330 521 206
# Inhib_H Inhib_I Inhib_J Inhib_K Micro Neu_FAT2.CDH15
# 208 39 42 25 784 20
# Oligo_A Oligo_B OPC
# 4389 195 911
# Drop the flagged "drop." clusters
sce.sacc <- sce.sacc[ ,-grep("drop.", sce.sacc$cellType)]
sce.sacc$cellType <- droplevels(sce.sacc$cellType)
## Load LIBD sACC stats (just need the "1vAll" result)
load("rdas/revision/markers-stats_sACC-n5_findMarkers-SN-LEVEL_MNT2021.rda", verbose=T)
# markers.sacc.t.pw, markers.sacc.wilcox.block, markers.sacc.t.1vAll, medianNon0.sacc
rm(markers.sacc.t.pw, markers.sacc.wilcox.block, markers.sacc.binom.block)
markers.sacc.enriched <- lapply(markers.sacc.t.1vAll, function(x){x[[2]]})
for(i in names(markers.sacc.enriched)){
rownames(markers.sacc.enriched[[i]]) <- rowData(sce.sacc)$gene_id[match(rownames(markers.sacc.enriched[[i]]),
rownames(sce.sacc))]
}
## Calculate and add t-statistic (= std.logFC * sqrt(N)) from contrasts
# and fix row order to the first entry "Astro"
fixTo <- rownames(markers.sacc.enriched[["Astro_A"]])
for(s in names(markers.sacc.enriched)){
markers.sacc.enriched[[s]]$t.stat <- markers.sacc.enriched[[s]]$std.logFC * sqrt(ncol(sce.sacc))
markers.sacc.enriched[[s]] <- markers.sacc.enriched[[s]][fixTo, ]
}
# Pull out the t's
ts.sacc <- sapply(markers.sacc.enriched, function(x){x$t.stat})
rownames(ts.sacc) <- fixTo
## Then for Velmeshev et al. - fix row order to the first entry "AST-FB"
fixTo <- rownames(markers.asdVelm.t.acc[["AST-FB"]])
for(s in names(markers.asdVelm.t.acc)){
markers.asdVelm.t.acc[[s]]$t.stat <- markers.asdVelm.t.acc[[s]]$std.logFC * sqrt(ncol(sce.asd.acc))
markers.asdVelm.t.acc[[s]] <- markers.asdVelm.t.acc[[s]][fixTo, ]
}
# Pull out the t's
ts.velmeshev.acc <- sapply(markers.asdVelm.t.acc, function(x){x$t.stat})
rownames(ts.velmeshev.acc) <- fixTo
## Take intersecting between two and subset/reorder
sharedGenes <- intersect(rownames(ts.velmeshev.acc), rownames(ts.sacc))
length(sharedGenes) # 27,442
ts.velmeshev.acc <- ts.velmeshev.acc[sharedGenes, ]
ts.sacc <- ts.sacc[sharedGenes, ]
cor_t_sacc <- cor(ts.sacc, ts.velmeshev.acc)
rownames(cor_t_sacc) = paste0(rownames(cor_t_sacc),"_","libd")
colnames(cor_t_sacc) = paste0(colnames(cor_t_sacc),"_","asd.acc")
range(cor_t_sacc)
## Heatmap
theSeq.all = seq(-.95, .95, by = 0.01)
my.col.all <- colorRampPalette(brewer.pal(7, "BrBG"))(length(theSeq.all)-1)
pdf("pdfs/exploration/Velmeshev-ASD_pfc-acc/overlap-velmeshev-ASD-acc_with_LIBD-10x-sACC_Aug2020.pdf")
pheatmap(cor_t_sacc,
color=my.col.all,
cluster_cols=F, cluster_rows=F,
breaks=theSeq.all,
fontsize=10, fontsize_row=11, fontsize_col=11,
display_numbers=T, number_format="%.2f", fontsize_number=6.0,
legend_breaks=c(seq(-0.95,0.95,by=0.475)),
main="Correlation of cluster-specific t's between LIBD sACC to \n ACC from (Velmeshev et al. Science 2019)")
dev.off()
### What if compared between both the .acc set of stats vs the .pfc?? =============
## Set up PFC t's
fixTo <- rownames(markers.asdVelm.t.pfc[["AST-FB"]])
for(s in names(markers.asdVelm.t.pfc)){
markers.asdVelm.t.pfc[[s]]$t.stat <- markers.asdVelm.t.pfc[[s]]$std.logFC * sqrt(ncol(sce.asd.pfc))
markers.asdVelm.t.pfc[[s]] <- markers.asdVelm.t.pfc[[s]][fixTo, ]
}
# Pull out the t's
ts.velmeshev.pfc <- sapply(markers.asdVelm.t.pfc, function(x){x$t.stat})
rownames(ts.velmeshev.pfc) <- fixTo
sharedGenes.all <- intersect(rownames(ts.velmeshev.acc), rownames(ts.sacc))
sharedGenes.all <- intersect(sharedGenes.all, rownames(ts.velmeshev.pfc))
# of length 27,890
# Subset/order
ts.sacc <- ts.sacc[sharedGenes.all, ]
ts.velmeshev.pfc <- ts.velmeshev.pfc[sharedGenes.all, ]
ts.velmeshev.acc <- ts.velmeshev.acc[sharedGenes.all, ]
colnames(ts.velmeshev.pfc) <- paste0(colnames(ts.velmeshev.pfc),"_pfc")
colnames(ts.velmeshev.acc) <- paste0(colnames(ts.velmeshev.acc),"_acc")
ts.velmeshev.full <- cbind(ts.velmeshev.pfc, ts.velmeshev.acc)
cor_t_sacc.asd <- cor(ts.sacc, ts.velmeshev.full)
range(cor_t_sacc.asd)
## Heatmap
# Add some cluster info for add'l heatmap annotations
regionInfo <- data.frame(region=ss(colnames(ts.velmeshev.full), "_",2))
rownames(regionInfo) <- colnames(ts.velmeshev.full)
# Re-name that "Neu_FAT2.CDH15" to "Neu_ambig", like for Fig 3
rownames(cor_t_sacc.asd)[rownames(cor_t_sacc.asd)=="Neu_FAT2.CDH15"] <- "Neu_ambig"
theSeq.all = seq(-.95, .95, by = 0.01)
my.col.all <- colorRampPalette(brewer.pal(7, "BrBG"))(length(theSeq.all)-1)
pdf("pdfs/revision/exploration/overlap-velmeshev-ASD-bothRegions_with_LIBD-10x-sACC_MNT2021.pdf", width=10)
pheatmap(cor_t_sacc.asd,
color=my.col.all,
annotation_col=regionInfo,
cluster_cols=F, cluster_rows=F,
breaks=theSeq.all,
fontsize=10.5, fontsize_row=11, fontsize_col=10,
display_numbers=T, number_format="%.2f", fontsize_number=4.5,
legend_breaks=c(seq(-0.95,0.95,by=0.475)),
main="Correlation of cluster-specific t's between LIBD sACC to \n ACC & PFC from (Velmeshev et al. Science 2019)")
dev.off()
### Session info for 22Jul2021 ==================================================
sessionInfo()
# R version 4.0.4 RC (2021-02-08 r79975)
# Platform: x86_64-pc-linux-gnu (64-bit)
# Running under: CentOS Linux 7 (Core)
#
# Matrix products: default
# BLAS: /jhpce/shared/jhpce/core/conda/miniconda3-4.6.14/envs/svnR-4.0.x/R/4.0.x/lib64/R/lib/libRblas.so
# LAPACK: /jhpce/shared/jhpce/core/conda/miniconda3-4.6.14/envs/svnR-4.0.x/R/4.0.x/lib64/R/lib/libRlapack.so
#
# locale:
# [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C LC_TIME=en_US.UTF-8
# [4] LC_COLLATE=en_US.UTF-8 LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
# [7] LC_PAPER=en_US.UTF-8 LC_NAME=C LC_ADDRESS=C
# [10] LC_TELEPHONE=C LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
#
# attached base packages:
# [1] parallel stats4 stats graphics grDevices datasets utils methods
# [9] base
#
# other attached packages:
# [1] RColorBrewer_1.1-2 pheatmap_1.0.12 limma_3.46.0
# [4] jaffelab_0.99.30 rafalib_1.0.0 DropletUtils_1.10.3
# [7] batchelor_1.6.3 scran_1.18.7 scater_1.18.6
# [10] ggplot2_3.3.3 EnsDb.Hsapiens.v86_2.99.0 ensembldb_2.14.1
# [13] AnnotationFilter_1.14.0 GenomicFeatures_1.42.3 AnnotationDbi_1.52.0
# [16] SingleCellExperiment_1.12.0 SummarizedExperiment_1.20.0 Biobase_2.50.0
# [19] GenomicRanges_1.42.0 GenomeInfoDb_1.26.7 IRanges_2.24.1
# [22] S4Vectors_0.28.1 BiocGenerics_0.36.1 MatrixGenerics_1.2.1
# [25] matrixStats_0.58.0
#
# loaded via a namespace (and not attached):
# [1] googledrive_1.0.1 ggbeeswarm_0.6.0 colorspace_2.0-0
# [4] ellipsis_0.3.2 scuttle_1.0.4 bluster_1.0.0
# [7] XVector_0.30.0 BiocNeighbors_1.8.2 rstudioapi_0.13
# [10] farver_2.1.0 bit64_4.0.5 fansi_0.4.2
# [13] xml2_1.3.2 splines_4.0.4 R.methodsS3_1.8.1
# [16] sparseMatrixStats_1.2.1 cachem_1.0.4 Rsamtools_2.6.0
# [19] ResidualMatrix_1.0.0 dbplyr_2.1.1 R.oo_1.24.0
# [22] HDF5Array_1.18.1 compiler_4.0.4 httr_1.4.2
# [25] dqrng_0.3.0 assertthat_0.2.1 Matrix_1.3-4
# [28] fastmap_1.1.0 lazyeval_0.2.2 BiocSingular_1.6.0
# [31] prettyunits_1.1.1 tools_4.0.4 rsvd_1.0.5
# [34] igraph_1.2.6 gtable_0.3.0 glue_1.4.2
# [37] GenomeInfoDbData_1.2.4 dplyr_1.0.5 rappdirs_0.3.3
# [40] Rcpp_1.0.6 vctrs_0.3.8 Biostrings_2.58.0
# [43] rhdf5filters_1.2.0 rtracklayer_1.50.0 DelayedMatrixStats_1.12.3
# [46] stringr_1.4.0 beachmat_2.6.4 lifecycle_1.0.0
# [49] irlba_2.3.3 statmod_1.4.35 XML_3.99-0.6
# [52] edgeR_3.32.1 zlibbioc_1.36.0 scales_1.1.1
# [55] hms_1.0.0 ProtGenerics_1.22.0 rhdf5_2.34.0
# [58] curl_4.3 memoise_2.0.0 gridExtra_2.3
# [61] segmented_1.3-4 biomaRt_2.46.3 stringi_1.5.3
# [64] RSQLite_2.2.7 BiocParallel_1.24.1 rlang_0.4.11
# [67] pkgconfig_2.0.3 bitops_1.0-7 lattice_0.20-41
# [70] purrr_0.3.4 Rhdf5lib_1.12.1 GenomicAlignments_1.26.0
# [73] bit_4.0.4 tidyselect_1.1.1 magrittr_2.0.1
# [76] R6_2.5.0 generics_0.1.0 DelayedArray_0.16.3
# [79] DBI_1.1.1 pillar_1.6.0 withr_2.4.2
# [82] RCurl_1.98-1.3 tibble_3.1.1 crayon_1.4.1
# [85] utf8_1.2.1 BiocFileCache_1.14.0 viridis_0.6.0
# [88] progress_1.2.2 locfit_1.5-9.4 grid_4.0.4
# [91] blob_1.2.1 R.utils_2.10.1 openssl_1.4.3
# [94] munsell_0.5.0 beeswarm_0.4.0 viridisLite_0.4.0
# [97] vipor_0.4.5 askpass_1.1