-
Notifications
You must be signed in to change notification settings - Fork 1
/
meristem-analysis-2.py
105 lines (88 loc) · 4.75 KB
/
meristem-analysis-2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
### 首先是在R中提取PCA降维
"""
library(Seurat)
scRNA <- readRDS("../scRNA-SMC-Cluster.rds")
DefaultAssay(scRNA) <- 'RNA'
# 输出该细胞类型的count矩阵 cell X gene
write.table(t(as.matrix(GetAssayData(object = scRNA, slot = "counts"))),
'SMC_counts.csv',
sep = ',', row.names = T, col.names = T, quote = F)
# 保存降维的数据
embed_pca <- Embeddings(scRNA,"pca")
write.table(embed_pca, "SMC_embed_pca.csv", sep = ',', row.names = T, col.names = T, quote = F)
"""
### 开始分析内容
# 加载所需的python包
import palantir
import scanpy as sc
import numpy as np
import os
import pandas as pd
# Plotting
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
# 加载数据集 ------------------------------------------------------------------
counts = palantir.io.from_csv('SMC_counts.csv')
counts
# 数据的归一化和标准化 ---------------------------------------------------------
norm_df = palantir.preprocess.normalize_counts(counts)
norm_df = palantir.preprocess.log_transform(norm_df)
# 数据的PCA降维 --------------------------------------------------------------
# 💡 PCA reduction, 这里注意需要导入之前Seurat中Harmony的降维数据
# pca_projections, _ = palantir.utils.run_pca(norm_df, use_hvg=False)
pca_projections = pd.read_csv("SMC_embed_pca.csv", header=0, index_col=0)
# Run diffusion maps
dm_res = palantir.utils.run_diffusion_maps(pca_projections, n_components=50)
ms_data = palantir.utils.determine_multiscale_space(dm_res)
# MAGIC imputation:使用MAGIC算法对单细胞的表达数据进行imputation处理
imp_df = palantir.utils.run_magic_imputation(norm_df, dm_res)
# tSNE降维与可视化 ------------------------------------------------------------
tsne = palantir.utils.run_tsne(ms_data)
fig, ax = palantir.plot.plot_tsne(tsne)
plt.savefig('scRNA-SMC-Palantir-TSNE.pdf')
# 绘制特征基因的表达谱 --------------------------------------------------------
# Os03g0155500:OsEXPA25 Os01g0248900:EXPA8 Os10g0578200:CSLD1
fig = palantir.plot.plot_gene_expression(imp_df, tsne, ['Os08g0532800','Os03g0181500','Os07g0194300','Os02g0814200'])
plt.savefig('scRNA-SMC-CZ-Gene.pdf')
fig = palantir.plot.plot_gene_expression(imp_df, tsne, ['Os12g0274700','Os01g0760900','Os01g0914300','Os10g0554800','Os11g0150400',
'Os10g0159700','Os01g0638000','Os03g0845000','Os02g0729700'])
plt.savefig('scRNA-SMC-PZ-Gene.pdf')
# 对降维后的数据进行聚类分群 ----------------------------------------------------
# 数据聚类
clusters = palantir.utils.determine_cell_clusters(pca_projections, k=200)
# 聚类结果可视化
fig = palantir.plot.plot_cell_clusters(tsne, clusters)
plt.savefig('scRNA-SMC-Palantir-Cluster.pdf')
# 运行Palantir进行分化发育轨迹推断 -----------------------------------------------
## 可以指定一个近似的最早的起始细胞来运行Palantir。Palantir可以自动确定终末分化状态的细胞,
## 也可以使用termine_states参数指定它们。
## 💡这里可以把之前的TSNE结果给输出出来,然后在EXCEL中进行作图,这样就可以很容易把最前面的cell的名字给找出来
tsne.to_csv("scRNA-SMC-Palantir-TSNE.csv",index=True,sep=',')
# 运行Palantir
start_cell = 'scRNA-SAM_TCAGTCCCAGTAACGG-1'
terminal_states = pd.Series(['CZ', 'PZ'],index=['scRNA-SAM_TCAAGCATCATAAGGA-1','scRNA-SAM_TGAGGTTAGGCTAGCA-1'])
pr_res = palantir.core.run_palantir(ms_data, start_cell, num_waypoints=500,
terminal_states=terminal_states.index, use_early_cell_as_start=True)
# 根据已有的生物学知识对终末分化的细胞进行重命名, 这里需要根据自己的具体情况修改
mapping = pd.Series(index=['CZ', 'PZ'])
mapping['CZ'] = "scRNA-SAM_TCAAGCATCATAAGGA-1"
mapping['PZ'] = "scRNA-SAM_TGAGGTTAGGCTAGCA-1"
mapping = pd.Series(mapping.index, index=mapping)
pr_res.branch_probs.columns = mapping[pr_res.branch_probs.columns]
pr_res.branch_probs = pr_res.branch_probs.loc[:, ['CZ', 'PZ']]
# 可视化Palantir的结果
fig = palantir.plot.plot_palantir_results(pr_res, tsne)
plt.savefig('scRNA-SMC-Palantir-Result.pdf')
# 基因表达趋势分析 ------------------------------------------------------------------
# Palantir使用Generalized Additive Models(GAMs)模型计算基因在不同分化细胞中的表达趋势
# 选择特征基因
genes = ['Os08g0532800','Os03g0181500','Os07g0194300','Os02g0814200']
# 计算基因的表达趋势
gene_trends = palantir.presults.compute_gene_trends(pr_res, imp_df.loc[:, genes])
# 基因表达趋势的可视化
fig = palantir.plot.plot_gene_trends(gene_trends)
plt.savefig('scRNA-SMC-Palantir-CZ-GeneTrends.pdf')
# 绘制基因表达趋势热图
fig = palantir.plot.plot_gene_trend_heatmaps(gene_trends)
plt.savefig('palantir_gene_trends_heatmaps.pdf')