-
Notifications
You must be signed in to change notification settings - Fork 0
/
step4_Batch_DE.R
186 lines (178 loc) · 7.84 KB
/
step4_Batch_DE.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
library("sva")
library("DESeq2")
library("RColorBrewer")
library("IHW")
library("clusterProfiler")
library("org.Mm.eg.db")
library("enrichplot")
library("stringr")
library("pheatmap")
is_mt <- function(x)
{
grepl('mt-',x)
}
directory <- "../upload/processeddata/"
sampleFiles_1 <- grep("hcg",list.files(directory),value=TRUE)
sampleName <- sub(".hcg.*","\\1",sampleFiles)
sampleCondition <- c("P815_M_dark","P815_M_dark","P815_M_dark","P815_M_light","P815_M_light",
"P815_M_light","P815_IFNgama_dark","P815_IFNgama_dark","P815_IFNgama_dark",
"P815_IFNgama_light","P815_IFNgama_light","P815_IFNgama_light")
# batch correct
sampleTable <- data.frame(sampleName = sampleName,
fileName = sampleFiles,
condition = sampleCondition)
sampleTable$condition <- factor(sampleTable$condition)
ddsHTSeq <- DESeqDataSetFromHTSeqCount(sampleTable = sampleTable,
directory = directory,
design= ~ condition)
keep <- !sapply(ddsHTSeq@rowRanges@partitioning@NAMES, is_mt)
ddsHTSeq <- ddsHTSeq[keep,]
mtx <- ddsHTSeq@assays@data$counts
batch <- c(1,1,1,1,1,1,1,1,0,1,1,0)
cov1 <- c(1,1,1,0,0,0,1,1,1,0,0,0)
cov2 <- c(0,0,0,0,0,0,1,1,1,1,1,1)
covar <- cbind(cov1,cov2)
adjusted_mtx <- ComBat_seq(mtx,batch = batch, covar_mod = covar)
mode(adjusted_mtx) <- "integer"
# f1 vs f2
index <- c(1,2,3,4,5,6)
sampleTable <- data.frame(sampleName = sampleName[index],
fileName = sampleFiles[index],
condition = sampleCondition[index])
sampleTable$condition <- factor(sampleTable$condition)
ddsHTSeq <- DESeqDataSetFromHTSeqCount(sampleTable = sampleTable,
directory = directory,
design= ~ condition)
keep <- !sapply(ddsHTSeq@rowRanges@partitioning@NAMES, is_mt)
ddsHTSeq <- ddsHTSeq[keep,]
ddsHTSeq@assays@data$counts <- adjusted_mtx[,index]
keep <- rowSums(counts(ddsHTSeq)) >= 10
ddsHTSeq <- ddsHTSeq[keep,]
dds <- DESeq(ddsHTSeq)
res <- results(dds, filterFun=ihw)
vsd <- vst(dds, blind=FALSE)
sampleDists <- dist(t(assay(vsd)))
sampleDistMatrix <- as.matrix(sampleDists)
colors <- colorRampPalette( rev(brewer.pal(9, "Blues")) )(255)
pheatmap(sampleDistMatrix,clustering_distance_rows=sampleDists,clustering_distance_cols=sampleDists,
col=colors,filename ='../result/f1_f2_sample.png')
select <- res$padj <= 0.05 & !is.na(res$padj) & abs(res$log2FoldChange) >= 1
gene_f1_f2 <- rownames(res)[select]
# m2 vs m1
index <- c(7,8,9,10,11,12)
sampleTable <- data.frame(sampleName = sampleName[index],
fileName = sampleFiles[index],
condition = sampleCondition[index])
sampleTable$condition <- factor(sampleTable$condition)
ddsHTSeq <- DESeqDataSetFromHTSeqCount(sampleTable = sampleTable,
directory = directory,
design= ~ condition)
keep <- !sapply(ddsHTSeq@rowRanges@partitioning@NAMES, is_mt)
ddsHTSeq <- ddsHTSeq[keep,]
ddsHTSeq@assays@data$counts <- adjusted_mtx[,index]
keep <- rowSums(counts(ddsHTSeq)) >= 10
ddsHTSeq <- ddsHTSeq[keep,]
dds <- DESeq(ddsHTSeq)
res <- results(dds, filterFun=ihw)
vsd <- vst(dds, blind=FALSE)
sampleDists <- dist(t(assay(vsd)))
sampleDistMatrix <- as.matrix(sampleDists)
colors <- colorRampPalette( rev(brewer.pal(9, "Blues")) )(255)
pheatmap(sampleDistMatrix,clustering_distance_rows=sampleDists,clustering_distance_cols=sampleDists,
col=colors,filename ='../result/m1_m2_sample.png')
select <- res$padj <= 0.05 & !is.na(res$padj) & abs(res$log2FoldChange) >= 1 &
!(rownames(res) %in% gene_f1_f2)
gene_m1_m2 <- rownames(res)[select]
go_bp <- enrichGO(gene = gene_m1_m2,
OrgDb = org.Mm.eg.db,
keyType = 'SYMBOL',
ont = "BP",
pAdjustMethod = "BH",
pvalueCutoff = 0.05,
qvalueCutoff = 0.05)
write.csv(go_bp@result, file="../result/m1_m2_go_bp.csv")
png(file='../result/m1_m2_go_bp.png',width=1200, height=1000,res=150)
dotplot(go_bp,showCategory = 10,x='count')
dev.off()
gene_go_bp <- c()
for (s in go_bp[1:10]$geneID) {
g <- str_split(s,"/")[[1]]
gene_go_bp <- c(gene_go_bp,g)
}
select_go_bp <- rownames(res) %in% gene_go_bp
df <- as.data.frame(colData(dds)[c("condition")])
pheatmap(assay(vsd)[select_go_bp,],cluster_rows = FALSE,
color = colorRampPalette(c("blue","white","red"))(255),
clustering_distance_cols = "correlation",annotation_col = df,
filename ='../result/m1_m2_gene.png')
# f2 vs m2
index <- c(1,2,3,7,8,9)
sampleTable <- data.frame(sampleName = sampleName[index],
fileName = sampleFiles[index],
condition = sampleCondition[index])
sampleTable$condition <- factor(sampleTable$condition)
ddsHTSeq <- DESeqDataSetFromHTSeqCount(sampleTable = sampleTable,
directory = directory,
design= ~ condition)
keep <- !sapply(ddsHTSeq@rowRanges@partitioning@NAMES, is_mt)
ddsHTSeq <- ddsHTSeq[keep,]
ddsHTSeq@assays@data$counts <- adjusted_mtx[,index]
keep <- rowSums(counts(ddsHTSeq)) >= 10
ddsHTSeq <- ddsHTSeq[keep,]
dds <- DESeq(ddsHTSeq)
res <- results(dds, filterFun=ihw)
vsd <- vst(dds, blind=FALSE)
sampleDists <- dist(t(assay(vsd)))
sampleDistMatrix <- as.matrix(sampleDists)
colors <- colorRampPalette( rev(brewer.pal(9, "Blues")) )(255)
pheatmap(sampleDistMatrix,clustering_distance_rows=sampleDists,clustering_distance_cols=sampleDists,
col=colors,filename ='../result/f2_m2_sample.png')
select <- res$padj <= 0.05 & !is.na(res$padj) & abs(res$log2FoldChange) >= 1
gene_f2_m2 <- rownames(res)[select]
# f1 vs m1
index <- c(4,5,6,10,11,12)
sampleTable <- data.frame(sampleName = sampleName[index],
fileName = sampleFiles[index],
condition = sampleCondition[index])
sampleTable$condition <- factor(sampleTable$condition)
ddsHTSeq <- DESeqDataSetFromHTSeqCount(sampleTable = sampleTable,
directory = directory,
design= ~ condition)
keep <- !sapply(ddsHTSeq@rowRanges@partitioning@NAMES, is_mt)
ddsHTSeq <- ddsHTSeq[keep,]
ddsHTSeq@assays@data$counts <- adjusted_mtx[,index]
keep <- rowSums(counts(ddsHTSeq)) >= 10
ddsHTSeq <- ddsHTSeq[keep,]
dds <- DESeq(ddsHTSeq)
res <- results(dds, filterFun=ihw)
vsd <- vst(dds, blind=FALSE)
sampleDists <- dist(t(assay(vsd)))
sampleDistMatrix <- as.matrix(sampleDists)
colors <- colorRampPalette( rev(brewer.pal(9, "Blues")) )(255)
pheatmap(sampleDistMatrix,clustering_distance_rows=sampleDists,clustering_distance_cols=sampleDists,
col=colors,filename ='../result/f1_m1_sample.png')
select <- res$padj <= 0.05 & !is.na(res$padj) & abs(res$log2FoldChange) >= 1 &
!(rownames(res) %in% gene_f2_m2)
gene_f1_m1 <- rownames(res)[select]
go_bp <- enrichGO(gene = gene_f1_m1,
OrgDb = org.Mm.eg.db,
keyType = 'SYMBOL',
ont = "BP",
pAdjustMethod = "BH",
pvalueCutoff = 0.05,
qvalueCutoff = 0.05)
write.csv(go_bp@result, file="../result/f1_m1_go_bp.csv")
png(file='../result/f1_m1_go_bp.png',width=1200, height=1000,res=150)
dotplot(go_bp,showCategory = 10,x='count')
dev.off()
gene_go_bp <- c()
for (s in go_bp[1:10]$geneID) {
g <- str_split(s,"/")[[1]]
gene_go_bp <- c(gene_go_bp,g)
}
select_go_bp <- rownames(res) %in% gene_go_bp
df <- as.data.frame(colData(dds)[c("condition")])
pheatmap(assay(vsd)[select_go_bp,],cluster_rows = FALSE,
color = colorRampPalette(c("blue","white","red"))(255),
clustering_distance_cols = "correlation",annotation_col = df,
filename ='../result/f1_m1_gene.png')