-
Notifications
You must be signed in to change notification settings - Fork 0
/
functions_main.R
721 lines (547 loc) · 28.3 KB
/
functions_main.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
st_deco_anno <- function(st = st, sc = sc, EV_spatalk_object=EV_spatalk_object, nrand = 2, nbin = 5, pval_thresh=0.05, mc.cores=30, sc.anno.id="pop", set.seeds = 1, select.celltype = select.celltype, pred_bin_cutoff=0.9, coef_bin_cutoff=2){
ncores = mc.cores
[email protected] <- select.celltype
modules <- EV_spatalk_object@modules#
ma <- names(modules)
print(ma)
st@assays$SCT <- NULL
st@assays$Spatial <- NULL
st@assays$integrated <- NULL
sc@assays$SCT <- NULL
sc@assays$Spatial <- NULL
sc@assays$integrated <- NULL
DefaultAssay(st) <- "RNA"
DefaultAssay(sc) <- "RNA"
print("Processing SCTransform on sc and st data")
st = SCTransform(st, return.only.var.genes = FALSE, vst.flavor = "v1")
sc = SCTransform(sc, return.only.var.genes = FALSE, vst.flavor = "v1")
DefaultAssay(st) <- "SCT"
DefaultAssay(sc) <- "SCT"
set.seed(set.seeds)
cell.types <- unique([email protected][,sc.anno.id])
columns_to_remove <- c('subclone', 'sumsq', 'topcor', 'Cycle', 'Stress', 'Interferon',
'Hypoxia', 'pEMT', 'Squamous', 'Squamous2', 'Glandular', 'Glandular2',
'Cilium', 'Metal', 'Undetermined', 'Undetermined2', 'Glandular1',
'Squamous1', 'AC', 'OPC', 'NPC', 'Mesenchymal', 'Ductal', 'Luminal',
'Keratinocyte', 'Basal', names(modules))
# 删除这些列
ST.data <- st
ST.data.mscore <- ST.data
#这里计算module score
#options(Seurat.object.assay.version = 'v4')
set.seed(set.seeds)
modules_rand = MakeRand(ST.data.mscore, db = modules, nrand = nrand, nbin = nbin)
ini = matrix(0,nrow = ncol(ST.data.mscore), ncol = length(modules))
rownames(ini) = colnames(ST.data.mscore)
colnames(ini) = names(modules)
[email protected][,names(modules)] = as.data.frame(ini)
for (m in names(modules)){
tryCatch(expr = {
ST.data.mscore = GeneToEnrichment(ST.data.mscore, db = modules[m], method = 'rand', db_rand = modules_rand[m])
}, error = function(e){c()})
}
scores = [email protected][,names(modules)]
scores.1 <- scores
[email protected] <- scores.1#>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>赋值每个spot的module score
scores[is.na(scores)] <- 0#NA部分替换为0
frequency = colMeans(scores > 0.5, na.rm = TRUE)
ST.data.mscore$state = apply([email protected][,names(modules)], 1, function(x){
top = which.max(x)
return(names(x)[top])
})
ST.data.mscore$state = factor(ST.data.mscore$state, levels = names(modules))
[email protected][is.na([email protected])] <- 0#NA部分替换为0
ma_bin = paste0(ma, '_bin')
scores = [email protected][,ma]
scores_bin = scores
scores_bin[] = as.numeric(scores_bin > 0.5)
ST.data.mscore = AddMetaData(ST.data.mscore, metadata = scores_bin, col.name = ma_bin)
nmf <- names(modules)
st <- ST.data.mscore
st = AddMetaData(st, metadata = st@images$image@coordinates[,c('row','col')], col.name = c('row','col'))
st$axis = st$row
st$axis = (st$axis - min(st$axis))/(max(st$axis) - min(st$axis))
#
print("Processing intergration of sc and st data")
srt <- sc
options(future.globals.maxSize = 5000*1024^2)
srt@assays$integrated <- NULL
srt@assays$Spatial <- NULL
st@assays$integrated <- NULL
st@assays$Spatial <- NULL
common_genes <- intersect(rownames(srt), rownames(st))
Idents(srt) <- "RNA"
Idents(st) <- "RNA"
DefaultAssay(srt) <- "RNA"
DefaultAssay(st) <- "RNA"
srt@assays$SCT <- NULL
st@assays$SCT <- NULL
srt@images$image <- NULL
srt@images$image.1 <- NULL
st@images$image <- NULL
st@images$image.1 <- NULL
srt <- srt[common_genes,]
st <- st[common_genes,]
#st@images$image <- ST.data.mscore@images$image #重新导入image
srt = SCTransform(srt, return.only.var.genes = FALSE, vst.flavor = "v1") %>% RunPCA() #%>% RunUMAP(dims = 1:20)
st = SCTransform(st, return.only.var.genes = FALSE, vst.flavor = "v1") %>% RunPCA() #%>% RunUMAP(dims = 1:20)
object.list = list('SC' = srt, 'ST' = st)
#
print("Take the intersection of variable genes between st and sc data")
genes.use = intersect(VariableFeatures(st), VariableFeatures(srt))
print("Processing SCT integration")
object.list = PrepSCTIntegration(object.list = object.list,
anchor.features = genes.use,
verbose = FALSE)
anchors = FindTransferAnchors(reference = object.list$SC, query = object.list$ST,
normalization.method = 'SCT',
features = genes.use,
verbose = FALSE, reduction = "rpca")
predictions = TransferData(anchorset = anchors, refdata = [email protected][,sc.anno.id])
predictions = predictions[,!colnames(predictions) %in% c('predicted.id','prediction.score.max')]
colnames(predictions) = gsub('prediction.score','pred',colnames(predictions))
pred = colnames(predictions)
st = AddMetaData(st, metadata = predictions, col.name = pred)#这步增加了基于单细胞的st spot预测概率
print("The integration of sc and st finished")
print("Binarizing predictions")
predictions_bin = predictions
predictions_bin[] = as.numeric(predictions_bin > pred_bin_cutoff)#这步cutoff是0.9导致很多细胞被认为是肿瘤细胞, 但是没问题后面有nnls
pred_bin = paste0(colnames(predictions_bin), '_bin')
st = AddMetaData(st, predictions_bin, col.name = pred_bin)
print("Deconvoluting st spot from paired scRNA-seq data")
genes.use = intersect(VariableFeatures(st), VariableFeatures(srt))
Idents(srt) <- sc.anno.id
prof = AverageExpression(srt, assay = 'SCT', layer = 'data')$SCT[genes.use,]
data = as.matrix(GetAssayData(st, assay = 'SCT', layer = 'data'))[genes.use,]
print("nnls regression")
coef = t(apply(data, 2, function(y){
coef(nnls(as.matrix(prof), y))
}))
colnames(coef) = colnames(prof)
nnls = colnames(coef)[colSums(coef > 0) >= 0]
prof = prof[,nnls]
coef = t(apply(data, 2, function(y){
coef(nnls(as.matrix(prof), y))
}))
nnls <- gsub("-", "_", nnls, fixed = T)
colnames(coef) = nnls
st = AddMetaData(st, coef, col.name = nnls)
print("Processing nnls coefficient scaling")
#
colnames(coef) <- gsub("-", "_", colnames(coef), fixed = T)#
#x="T_cells"
coef_scaled = sapply(nnls, function(x){
vec = coef[,x]
y = paste0('pred.',x)
spots.use = colnames(st)[[email protected][,y] == 0]
#spots.use = colnames(st)[[email protected][,y] == 0 & rowSums(predictions_bin) == 1]
if (length(spots.use) < 5){
spots.use = colnames(st)[order([email protected][,y])[1:5]]
}
nrand=1#原来是2, 100次迭代
if(nrand==2){nrand=nrand-1}
data_rand = Reduce(cbind, lapply(1:10^nrand, function(i){
t(apply(data[,spots.use], 1, function(expr){
sample(expr, length(expr), replace = FALSE)
}))
}))
coef_rand = t(apply(data_rand, 2, function(y){
coef(nnls(as.matrix(prof), y))
}))
colnames(coef_rand) = nnls
if (sd(coef_rand[,x]) == 0){
return((coef[,x] - mean(coef_rand[,x]))/min(coef[coef[,x] > 0,x]))
} else {
return((coef[,x] - mean(coef_rand[,x]))/sd(coef_rand[,x]))
}
})#>到这是对nnls的coef进行标准化
nnls_scaled = paste0(nnls, '_scaled')#nnls_scale是coef_scaled 进行nnls的coefficient进行标准化
st = AddMetaData(st, coef_scaled, col.name = nnls_scaled)
print("nnls binarizing")
coef_bin = coef_scaled
coef_bin[] = (coef_bin > coef_bin_cutoff)#scale的nnls系数相加大于2可以认为是不同类型的细胞
nnls_bin = paste0(nnls, '_bin')
EV_spatalk_object@nnls_bin <- nnls_bin#>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>赋值nnls_bin
st = AddMetaData(st, coef_bin, col.name = nnls_bin)
nnls_scaled = paste0(nnls, '_scaled')
nnls_bin = paste0(nnls, '_bin')
#这块就先不加macro和T细胞的
st@images<- ST.data@images
print("add neighboor")
DefaultAssay(st) <- "SCT"
nei = FindSTNeighbors(st, d_min = 0, d_max = 1.5)#FindSTNeighbors要从seurat_fucntions_public.R里面找
[email protected] <- nei#>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>赋值neighboorhood
#先定义每个spot周围的spotID, 在metadata中找到这些spot,看每种细胞类型在的占比即mean值。
coef_nei = sapply(nnls_bin, function(x){
#spot = nei[1]
sapply(nei, function(spot){
y = [email protected][spot,x]
y[y < 0] = 0
mean(y, na.rm = TRUE)
})
})
colnames(coef_nei) = nnls
nnls_nei = paste0(nnls, '_nei')
st = AddMetaData(st, coef_nei, col.name = nnls_nei)
print("Add cell distance information from ST")
coord = st@images$image@coordinates[,c('imagerow','imagecol')]
prox = 'inverse'
distances = as.matrix(dist(coord))
distances = distances/min(distances[distances > 0])#矩阵标准化
distance.all.spot <- 1/(1+distances)
[email protected] <- as.data.frame(distance.all.spot)#>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>赋值distance
#
coef_dist = sapply(nnls_bin, function(x){
w = colnames(st)[as.logical([email protected][,x])]
w = w[!is.na(w)]
if (length(w) == 1){
mi = as.numeric(distances[,w])
} else {
mi = apply(distances[,w], 1, function(x){min(as.numeric(x), na.rm = TRUE)})
}
return(mi)
})
if (prox == 'inverse'){
coef_dist = 1/(1+coef_dist)
}
if (prox == 'opposite'){
coef_dist = -coef_dist
}
colnames(coef_dist) = nnls
nnls_dist = paste0(nnls, '_dist')#这边同样nnls_dist包括所有
st = AddMetaData(st, coef_dist, col.name = nnls_dist)
print("Cell categorization|Classify all cells into normal, both and malignant")
# Categories
st$cat = apply([email protected][,nnls_bin], 1, function(x){
if (x['Malignant_bin']){
if (sum(x) == 1){
return('Malignant')
} else {
return('Both')
}
} else {
if (sum(x) == 0){
return(NA)
} else {
return('Normal')
}
}
})
cats = c('Malignant','Both','Normal')
st$cat = factor(st$cat, levels = cats)
#add new seurat slot
#slot(EV_spatalk_object, "st.seurat.obj") <- st # 替换为您的实际ST Seurat对象
#slot(EV_spatalk_object, "sc.seurat.obj") <- sc # 替换为您的实际SC Seurat对象
#add your gene signature of cell states
[email protected] <- st#>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>赋值最新的st
[email protected] <- sc#>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>赋值最新的sc
EV.spatalk.results <- EV_spatalk_object
return(EV.spatalk.results)
}
#
find_niche_LR <- function(EV_spatalk_object=EV.spatalk.results, prox="inverse", mc.cores=30, s.cell.type = c("Malignant", "T_cells"), comm_list=comm_list, datatype='mean count', method="pseudocount"){
numCores = mc.cores
st <- [email protected]
[email protected] <- s.cell.type#>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>赋值s.cell.type
nnls_bin <- EV_spatalk_object@nnls_bin
coord = [email protected]@images$image@coordinates[,c('imagerow','imagecol')]
distances = as.matrix(dist(coord))
distances = distances/min(distances[distances > 0])#矩阵标准化
coef_dist = sapply(nnls_bin, function(x){
w = colnames(st)[as.logical([email protected]@meta.data[,x])]
w = w[!is.na(w)]
if (length(w) == 1){
mi = as.numeric(distances[,w])
mi.ID = w
} else {
mi = apply(distances[,w], 1, function(x){min(as.numeric(x), na.rm = TRUE)})
mi.ID = apply(distances[,w], 1, function(x){names(which.min(x))})
}
return(mi)
#return(mi.ID)
})
if (prox == 'inverse'){
coef_dist = 1/(1+coef_dist)
}
colnames(coef_dist) = nnls_bin
print("finding nearest spot id")
coef_id = sapply(nnls_bin, function(x){
w = colnames(st)[as.logical([email protected]@meta.data[,x])]
w = w[!is.na(w)]
if (length(w) == 1){
mi = as.numeric(distances[,w])
mi.ID = w
} else {
mi = apply(distances[,w], 1, function(x){min(as.numeric(x), na.rm = TRUE)})
mi.ID = apply(distances[,w], 1, function(x){names(which.min(x))})
}
#return(mi)
return(mi.ID)
})
colnames(coef_dist) <- gsub("_bin", "", colnames(coef_dist), fixed = T)
colnames(coef_id) <- gsub("_bin", "", colnames(coef_id), fixed = T)
list.results <- list()
list.results[["cell_dist"]] <- as.data.frame(coef_dist)
list.results[["cell_id"]] <- as.data.frame(coef_id)
#list.results#是包含最近距离和最近细胞的两个矩阵,可以导出了
[email protected] <- list.results #>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>赋值最近距离和最近细胞
#
#s.cell.type <- c("Malignant", "T_cells")#这里可以自定义细胞类型
sender.receiver.bi.res <- [email protected]@meta.data[,c(paste(s.cell.type, "_bin", sep = ""))]
sender.cell.id <- rownames(sender.receiver.bi.res[sender.receiver.bi.res[,1]==1,])#sender的spotID
receiver.cell.id <- rownames(sender.receiver.bi.res[sender.receiver.bi.res[,2]==1,])#receiver的spotID
[email protected] <- sender.cell.id#>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>赋值所有sender spotid
[email protected] <- receiver.cell.id#>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>赋值所有Receiver.spot.id
#s.cell.type <- s.cell.type
print(paste0("Sender:", s.cell.type[1], " | ", "Receiver:", s.cell.type[2]))
nitch.id <- rownames(list.results$cell_id)#所有spot的id
all.nitch.LR.talk.list <- list()
# 使用mclapply进行并行计算
#s.cell.type <- nnls_bin
all.nitch.LR.talk.list <- mclapply(nitch.id, function(i) {
nitch.anno <- cbind.data.frame(cell.id=as.character(list.results$cell_id[i,][,s.cell.type]),
anno=names(list.results$cell_id[i,][,s.cell.type]))
nitch.anno$cb.cell.id <- paste(nitch.anno$cell.id, nitch.anno$anno, sep = "|")
iTalk_data <- as.data.frame(t(st@assays$SCT$data[,nitch.anno$cell.id]))
rownames(iTalk_data) <- nitch.anno$cb.cell.id
iTalk_data$cell_type <- nitch.anno$cb.cell.id
unique(iTalk_data$cell_type)
#unique(iTalk_data$compare_group)
highly_exprs_genes <- rawParse2(iTalk_data, top_genes=200, stats="mean")#这里可以改成10000
# 通讯类型
#comm_list<-c('growth factor','other','cytokine','checkpoint')#这是所有的可以自己设置
#comm_list<-c('checkpoint')
cell_types <- unique(iTalk_data$cell_type)
#cell_col <- structure(my10colors[1:length(cell_types)], names=cell_types)
#comm_type = comm_list[1]
#database <- database[database$Classification == comm_type,]#这边需要换成cellchat的database
#comm_list= c("Cell-Cell Contact", "ECM-Receptor", "Secreted Signaling")
#comm_list= comm_list[c(1, 2, 4)]
iTalk_res <- NULL
for(comm_type in comm_list){
res_cat <- FindLR2(highly_exprs_genes, datatype='mean count', comm_type=comm_type)#这里不对随时转换为FindLR2
iTalk_res <- rbind(iTalk_res, res_cat)
}
if(method=="weighted.sum"){
iTalk_res$multiply <- log2(iTalk_res$cell_from_mean_exprs+1) + log2(iTalk_res$cell_to_mean_exprs+1)#这步骤是weighted sum
}
#iTalk_res$multiply <- log2(iTalk_res$cell_from_mean_exprs+1) + log2(iTalk_res$cell_to_mean_exprs+1)#这步骤是weighted sum
#pesudocount
# 应用函数计算互作强度
if(method=="pseudocount"){
iTalk_res$multiply <- calculate_interaction_strength(iTalk_res$cell_from_mean_exprs, iTalk_res$cell_to_mean_exprs)#pseudocount method
}
iTalk_res <- iTalk_res[order(iTalk_res$multiply, decreasing = T),]
# 返回iTalk_res
return(iTalk_res)
}, mc.cores = numCores)
names(all.nitch.LR.talk.list) <- nitch.id
#
[email protected] <- all.nitch.LR.talk.list#>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>在确定好sender和receiver之后,赋值所有spot的受配体表达情况
return(EV_spatalk_object)
}
#是求所有spot中存在都存在的LR作用
find.inter.LR <- function(EV_spatalk_object=EV.spatalk.results, mc.cores=30){
s.cell.type <- [email protected]
sender.receiver.bi.res <- [email protected]@meta.data[,c(paste(s.cell.type, "_bin", sep = ""))]
Sender.spot.id <- [email protected]#sender的spotID
Receiver.spot.id <- [email protected]#receiver的spotID
all.nitch.LR.talk.list <- [email protected]
lr_interaction_list <- list()
# (1)遍历所有spot的LR交互情况并提取multiply值; #所有的spot从里面只挑选Malignant_bin|T_cells_bin的受配体
direction = paste(s.cell.type[1], s.cell.type[2], sep = "|")
print(paste("The direction of your EV-mediated cell-cell interation pattern is", direction, sep = ": "))
#c("Malignant_bin|T_cells_bin")#方要设定好
#lr_interaction_list 这步lr_interaction_list 可以选择不同的spot
lr_interaction_list <- mclapply(names(all.nitch.LR.talk.list), function(spot_id) {
lr_data <- all.nitch.LR.talk.list[[spot_id]]
lr_data$interaction <- paste(lr_data$ligand, lr_data$receptor, sep = "_")
# 提取cell_from和cell_to中“|”之后的字符
extract_after_pipe <- function(string) {
sub(".*\\|", "", string)
}
# 合并提取的字符作为方向
lr_data$direction <- paste(
sapply(lr_data$cell_from, extract_after_pipe),
sapply(lr_data$cell_to, extract_after_pipe),
sep = "|"
)
lr_data <- lr_data[lr_data$direction==direction,]
return(lr_data[, c("interaction", "direction", "multiply")])
}, mc.cores = mc.cores)
#所有的spot从里面只挑选Malignant_bin|T_cells_bin的受配体
names(lr_interaction_list) <- names([email protected])
[email protected]_interaction <- lr_interaction_list #>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>赋值所有spot 每个sender 的LR 的sender>receiver情况
#pick the malignnat as send cell
[email protected]_interaction <- lr_interaction_list[Sender.spot.id]#>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>赋值sender spot 每个sender 的LR 的sender>receiver情况
return(EV_spatalk_object)
}
#
find_EV_spatalk_LR <- function(EV_spatalk_object=EV.spatalk.results, mc.cores=30, seeds=1){
#lr_interaction_list <- [email protected]_interaction
#sender.cell.id <- [email protected]
lr_interaction_list <- [email protected]_interaction
all_lr <- unique(unlist(lapply(lr_interaction_list, function(df) df$interaction)))
list.results <- [email protected]
sender.cell.id <- [email protected]
# 初始化结果列表
correlation_results <- list()
# 遍历所有LR对
for (lr in all_lr) {
# 提取所有spot中特定LR的multiply值
multiply_values <- get_lr_multiply(lr, lr_interaction_list)
# 提取所有spot的T_cells_bin距离
distances <- list.results$cell_dist[sender.cell.id, s.cell.type[2]]
# 计算multiply值与距离的相关性
cor_test <- cor.test(multiply_values, distances, method = "pearson")
# 保存相关性结果
correlation_results[[lr]] <- tidy(cor_test)
}
# 将结果转换为数据框
distance.correlation_results_df <- bind_rows(correlation_results, .id = "LR")
[email protected] <- as.data.frame(distance.correlation_results_df)
#后面再加上个和EV_release的相关性,和差异分析的结果。
correlation_results <- list()
# 遍历所有LR对
for (lr in all_lr) {
# 提取所有spot中特定LR的multiply值
multiply_values <- get_lr_multiply(lr, lr_interaction_list)
# 提取所有spot的T_cells_bin距离
EV.release.score <- [email protected]@meta.data[sender.cell.id,]$EV_release
# 计算multiply值与距离的相关性
cor_test <- cor.test(multiply_values, EV.release.score, method = "pearson")
# 保存相关性结果
correlation_results[[lr]] <- tidy(cor_test)
}
# 将结果转换为数据框
EVrelease.correlation_results_df <- bind_rows(correlation_results, .id = "LR")
[email protected] <- as.data.frame(EVrelease.correlation_results_df)
# 筛选出p值小于0.05
significant_distance <- subset(distance.correlation_results_df, p.value < 0.05)
significant_EVrelease <- subset(EVrelease.correlation_results_df, p.value < 0.05)
# 确定正负相关性
positive_distance <- subset(significant_distance, estimate > 0)
negative_distance <- subset(significant_distance, estimate < 0)
positive_EVrelease <- subset(significant_EVrelease, estimate > 0)
negative_EVrelease <- subset(significant_EVrelease, estimate < 0)
# 找出两者都是正相关或都是负相关的LR
common_positive_LR <- intersect(positive_distance$LR, positive_EVrelease$LR)#正相关
common_negative_LR <- intersect(negative_distance$LR, negative_EVrelease$LR)#负相关
#
cat("Number of LRs with positive correlation in both:", length(common_positive_LR), "\n")
cat("Number of LRs with negative correlation in both:", length(common_negative_LR), "\n")
inter.LR.results <- list()
inter.LR.results[["common_positive_LR"]] <- common_positive_LR
inter.LR.results[["common_negative_LR"]] <- common_negative_LR
[email protected] <- inter.LR.results
#求所有spot中存在的LR的个数
#每一个spot中出现的>0的LR的个数
set.seed(seeds)
LR.list <- [email protected]_interaction
interaction_frequencies <- numeric(length(LR.list))
# 遍历LR.list中的每个元素
for (i in seq_along(LR.list)) {
# 获取当前data.frame
current_df <- LR.list[[i]]
# 计算multiply大于0的行数
interaction_frequencies[i] <- sum(current_df$multiply > 0)
}
# 创建一个data.frame来存储结果
result_df <- data.frame(Spot = seq_along(LR.list), Frequency = interaction_frequencies)
# 打印结果
#print(result_df)
# 创建一个函数来计算每个data frame中multiply大于0的次数
count_interactions <- function(df) {
sum(df$multiply > 0)
}
# 应用这个函数到LR.list中的每个元素
interaction_counts <- sapply(LR.list, count_interactions)
# 创建一个data frame来存储每个spot和对应的interaction次数
spot.LR.freq.results <- cbind.data.frame(Spot = names(interaction_counts), Frequency = interaction_counts)
[email protected] <- spot.LR.freq.results
#求所有LR在spot中的额频率和RRA #aggregateRanks {RobustRankAggreg}
#每一LR在所有spot中的freq
#>
lr_interactions <- unique(LR.list[[1]]$interaction)
# Initialize a named vector to store the frequency of non-zero 'multiply' values for each LR
lr_frequencies <- setNames(rep(0, length(lr_interactions)), lr_interactions)
# Function to increment frequency count for non-zero 'multiply' values, counting each interaction only once per spot
# Calculate frequencies across all spots
for (spot_id in names(LR.list)) {
lr_frequencies <- increment_frequency(LR.list[[spot_id]], lr_frequencies)
}
# Create a data.frame for the frequencies
LR.in.spot.frequency_table <- data.frame(LR = names(lr_frequencies), Freq.num = lr_frequencies, Frequency=lr_frequencies/length(LR.list))
# Print the result
#print(LR.in.spot.frequency_table)
[email protected]_table <- as.data.frame(LR.in.spot.frequency_table)
#>
#RAA方法根据这些LR出现频率计算pvalue
#首先将LR.list替换为计算RRA的list 命名为RRA.list, RRA.list的结构为list名字是每个spot的名字,里面的条目是interaction(就是LR的名字, 前面已经根据表达量排序好了)
#然后应用RRA算法,对基因进行整合排序
set.seed(seeds)
RRA.list <- list()
# Extract and store the sorted interactions for each spot
for (spot_id in names(LR.list)) {
# Assuming that higher 'multiply' values are better and have been pre-sorted in descending order
RRA.list[[spot_id]] <- LR.list[[spot_id]]$interaction
}
LR.in.spot.RRA.results <- aggregateRanks(RRA.list, method="RRA", full = T, N=length(RRA.list))
[email protected] <- as.data.frame(LR.in.spot.RRA.results)
#整理EV_spatalk_stat_results 的统计结果,包括RRA,dist.corr,EV.release.corr
Distance.corr.LR.results <- [email protected]
rownames(Distance.corr.LR.results) <- Distance.corr.LR.results$LR
colnames(Distance.corr.LR.results) <- paste0("dist_", colnames(Distance.corr.LR.results))
Distance.corr.LR.results <- Distance.corr.LR.results[,c("dist_estimate", "dist_statistic", "dist_p.value")]
EVrelease.corr.LR.results <- [email protected]
rownames(EVrelease.corr.LR.results) <- EVrelease.corr.LR.results$LR
colnames(EVrelease.corr.LR.results) <- paste0("EV.release_", colnames(EVrelease.corr.LR.results))
EVrelease.corr.LR.results <- EVrelease.corr.LR.results[,c("EV.release_estimate", "EV.release_statistic", "EV.release_p.value")]
colnames(LR.in.spot.RRA.results) <- paste0("RRA_", colnames(LR.in.spot.RRA.results))
colnames(LR.in.spot.RRA.results)[1] <- "LR_pairs_ID"
inter.rowname <- intersect(rownames(Distance.corr.LR.results), rownames(EVrelease.corr.LR.results))
inter.rowname <- intersect(inter.rowname, rownames(LR.in.spot.RRA.results))
EV_spatalk_stat_results <- cbind.data.frame(LR.in.spot.RRA.results[inter.rowname,], Distance.corr.LR.results[inter.rowname,], EVrelease.corr.LR.results[inter.rowname,])
EV_spatalk_object@EV_spatalk_stat_results <- as.data.frame(EV_spatalk_stat_results)
return(EV_spatalk_object)
}
#
add_interaction_score <- function(EV_spatalk_object=EV.spatalk.results, mc.cores=30){
#选择一个LR,可视化展示
#例如"CD86_CTLA4"
distance.correlation_results_df <- [email protected]
EVrelease.correlation_results_df <- [email protected]
common_LR.id <- c(EVrelease.correlation_results_df$LR, distance.correlation_results_df$LR)#这边是所有候选到的LR,没有取pvalue
common_LR.id <- unique(common_LR.id)
#提取显著common_positive_LR的LR
#PVR_TIGIT
#lr_interaction_list.raw$`AAACACCAATAACTGC-1`
# Assuming `common_LR.id` is a character vector of the common ligand-receptor IDs
# And `lr_interaction_list.raw` is your list of data frames
# Initialize an empty matrix with rows as spot IDs and columns as common LR ids
lr_interaction_list.raw <- [email protected]_interaction
interaction_matrix <- matrix(NA, nrow = length(lr_interaction_list.raw), ncol = length(common_LR.id))
rownames(interaction_matrix) <- names(lr_interaction_list.raw)
colnames(interaction_matrix) <- common_LR.id
# 获取核心数,设置要使用的核心数
#no_cores <- detectCores() - 1
# 使用mclapply并行处理每个spot
results <- mclapply(names(lr_interaction_list.raw), process_spot, lr_list = lr_interaction_list.raw, lr_ids = common_LR.id, mc.cores = mc.cores)
# 将结果转换为矩阵
interaction_matrix <- do.call(rbind, results)
# 转换为数据框
interaction_df_raw.indensity <- as.data.frame(interaction_matrix)
rownames(interaction_df_raw.indensity) <- names(lr_interaction_list.raw)
EV_spatalk_object@interaction_df_raw.indensity <- interaction_df_raw.indensity
interaction_df <- as.data.frame(interaction_matrix)
interaction_df <- apply(interaction_df, 2, function(x){(x-min(x))/max(x)})
# 如果需要,可以将rownames设置为spot IDs
rownames(interaction_df) <- names(lr_interaction_list.raw)
interaction_df[interaction_df=="NaN"] <- 0
# Merge this data frame with the metadata of the Seurat object
# Ensure that the rownames of your Seurat metadata match the spot IDs
[email protected] <- AddMetaData([email protected], metadata = interaction_df)
EV_spatalk_object@interaction_df = as.data.frame(interaction_df)#增加每个spot的all.spotLR_interaction_score
return(EV_spatalk_object)
}