-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
adding emd file for simulation example - figure 1
- Loading branch information
1 parent
e8c8356
commit 126a06d
Showing
7 changed files
with
1,511 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,258 @@ | ||
--- | ||
title: "figure_1" | ||
output: | ||
html_document: | ||
code_folding: hide | ||
--- | ||
|
||
This is a simulation example that shows that confounders and batch effects impact reconstruction of gene co-expression networks. Using this example we also show that true underlying network structure can be reconstructed after principal component correction of gene expression data as described in the paper. | ||
|
||
This script generates Figure 1 in the paper -- "Addressing confounding artifacts in reconstruction of gene co-expression networks" | ||
|
||
```{r} | ||
rm(list = ls()) | ||
## load libraries | ||
#library(pcalg) | ||
library(mvtnorm) | ||
library(clusterGeneration) | ||
library(sva) | ||
#library(spacejam) | ||
library(gridExtra) | ||
library(gtable) | ||
library(glasso) | ||
library(huge) | ||
library(devtools) | ||
install_github('alyssafrazee/RSkittleBrewer') | ||
library(RSkittleBrewer) | ||
tropical = RSkittleBrewer('tropical') | ||
library(ggplot2) | ||
library(reshape2) | ||
library(cowplot) | ||
### Data: 5 features, 10 samples | ||
p=8 | ||
n=10 | ||
V=diag(1,nrow=p,ncol=p) | ||
V[1,3]=V[3,1]=.35 | ||
V[4,7]=V[7,4]=.3 | ||
V[1,8]=V[8,1]=.3 | ||
V.inv=solve(V) | ||
colnames(V)=rownames(V)=colnames(V.inv)=rownames(V.inv)=paste(1:8) | ||
A.generating=V | ||
for(i in 1:p){ | ||
for(j in 1:p){ | ||
if(abs(V[i,j])>0.05){ | ||
A.generating[i,j]=1 | ||
} | ||
if(abs(V[i,j])<=0.05){ | ||
A.generating[i,j]=0 | ||
} | ||
} | ||
} | ||
g.generating=graph.adjacency(A.generating,mode="undirected", diag=FALSE,add.colnames=TRUE) | ||
set.seed(4747) | ||
data=rmvnorm((1000*n),mean=rep(1,p),sigma=V) | ||
rownames(data)=1:(1000*n) | ||
colnames(data)=1:8 | ||
cor.data=cor(data) | ||
## Graph | ||
lambda=seq(1,0.01,length.out=200) | ||
glasso.unconfounded.lambda=huge(data,lambda,method="glasso") | ||
glasso.adjacency.unconfounded.lambda=glasso.unconfounded.lambda$path | ||
glasso.graph.unconfounded.lambda=list() | ||
for(i in 1:200){ | ||
glasso.graph.unconfounded.lambda[[i]]=graph.adjacency(glasso.adjacency.unconfounded.lambda[[i]],mode="undirected", diag=FALSE) | ||
} | ||
############ Confounding ############ | ||
data.confounded=data | ||
grp=rnorm(1000*n) | ||
for(i in 2:6){ | ||
data.confounded[,i] = data.confounded[,i] + 50*grp | ||
} | ||
rownames(data.confounded)=1:(1000*n) | ||
colnames(data.confounded)=1:8 | ||
cor.data.confounded=cor(data.confounded) | ||
colnames(cor.data.confounded)=rownames(cor.data.confounded)=1:8 | ||
colnames(data.confounded)=1:8 | ||
rownames(data.confounded)=1:10000 | ||
glasso.confounded=huge(data.confounded,lambda,method="glasso") | ||
glasso.adjacency.confounded=glasso.confounded$path | ||
glasso.graph.confounded=list() | ||
for(i in 1:200){ | ||
glasso.graph.confounded[[i]]=graph.adjacency(glasso.adjacency.confounded[[i]],mode="undirected", diag=FALSE) | ||
} | ||
############ Correct for Confounding (Residuals from PC) ############ | ||
mod=matrix(1,nrow=dim(data.confounded)[1],ncol=1) | ||
colnames(mod)="Intercept" | ||
nsv=num.sv(t(data.confounded),mod) | ||
ss=svd(data.confounded - colMeans(data.confounded)) | ||
grp.est=ss$u[,1] | ||
data.corrected=data.confounded*0 | ||
for(i in 1:8){ | ||
data.corrected[,i]=lm(data.confounded[,i] ~ grp.est)$residuals | ||
} | ||
rownames(data.corrected)=1:(1000*n) | ||
colnames(data.corrected)=1:8 | ||
cor.data.corrected=cor(data.corrected) | ||
colnames(cor.data.corrected)=rownames(cor.data.corrected)=1:8 | ||
glasso.corrected.lambda=huge(data.corrected,method="glasso",lambda) | ||
glasso.graph.corrected.lambda=list() | ||
for(i in 1:200){ | ||
glasso.graph.corrected.lambda[[i]]=graph.adjacency(glasso.corrected.lambda$path[[i]],mode="undirected", diag=FALSE) | ||
} | ||
## For a given gene, samples are standard normal | ||
z.data=data | ||
for(j in 1:8){ | ||
mean=mean(data[,j]) | ||
sd=sd(data[,j]) | ||
z.data[,j]=(data[,j]-mean)/sd | ||
} | ||
z.data.confounded=data.confounded | ||
for(j in 1:8){ | ||
mean=mean(data.confounded[,j]) | ||
sd=sd(data.confounded[,j]) | ||
z.data.confounded[,j]=(data.confounded[,j]-mean)/sd | ||
} | ||
z.data.corrected=data.corrected | ||
for(j in 1:8){ | ||
mean=mean(data.corrected[,j]) | ||
sd=sd(data.corrected[,j]) | ||
z.data.corrected[,j]=(data.corrected[,j]-mean)/sd | ||
} | ||
``` | ||
|
||
This part generates plots for each figure panel and saves it in a pdf | ||
|
||
```{r, echo=FALSE} | ||
### Plots | ||
col.expr=colorRampPalette(c("#d8b365", "#f5f5f5", "#5ab4ac")) | ||
col.corr=colorRampPalette(c("#ef8a62", "#f7f7f7", "#67a9cf")) | ||
at=seq(-1,1,length.out=50) | ||
layout=layout.circle(g.generating) | ||
## unconfounded data | ||
plot.z.data = melt(z.data[1:10,]) | ||
plot.z.data$Var1 <- factor(plot.z.data$Var1) | ||
plot.z.data$Var2 <- factor(plot.z.data$Var2) | ||
plot.cor.data = melt(cor.data) | ||
plot.cor.data$Var1 <- factor(plot.cor.data$Var1) | ||
plot.cor.data$Var2 <- factor(plot.cor.data$Var2) | ||
### Panel a | ||
panela <- ggplot(plot.z.data, aes(Var1, Var2))+ | ||
geom_tile(aes(fill = value))+ | ||
scale_fill_gradientn(colours = col.expr(50), limits = c(-2.5,2.5))+theme_bw() + xlab("Sample") + ylab("Gene") | ||
### Panel b | ||
panelb <- ggplot(plot.cor.data, aes(Var1, Var2))+ | ||
geom_tile(aes(fill = value))+ | ||
scale_fill_gradientn(colours = col.corr(50), | ||
limits = c(-1,1))+theme_bw() + xlab("Gene") + ylab("Gene") | ||
### Panel c | ||
pdf("panelc.pdf", height = 3, width = 3) | ||
plot(glasso.graph.unconfounded.lambda[[150]],layout=layout,vertex.label=1:8,vertex.size=50,edge.color="limegreen",edge.width=4, vertex.color="cornsilk") | ||
dev.off() | ||
## confounded | ||
plot.z.data.confounded = melt(z.data.confounded[1:10,]) | ||
plot.z.data.confounded$Var1 <- factor(plot.z.data.confounded$Var1) | ||
plot.z.data.confounded$Var2 <- factor(plot.z.data.confounded$Var2) | ||
plot.cor.data.confounded = melt(cor.data.confounded) | ||
plot.cor.data.confounded$Var1 <- factor(plot.cor.data.confounded$Var1) | ||
plot.cor.data.confounded$Var2 <- factor(plot.cor.data.confounded$Var2) | ||
### Panel d | ||
paneld <- ggplot(plot.z.data.confounded, aes(Var1, Var2))+ | ||
geom_tile(aes(fill = value))+ | ||
scale_fill_gradientn(colours = col.expr(50), limits = c(-2.5,2.5))+theme_bw() + xlab("Sample") + ylab("Gene") | ||
### Panel e | ||
panele <- ggplot(plot.cor.data.confounded, aes(Var1, Var2))+ | ||
geom_tile(aes(fill = value))+ | ||
scale_fill_gradientn(colours = col.corr(50), | ||
limits = c(-1,1))+theme_bw() + xlab("Gene") + ylab("Gene") | ||
### Panel f | ||
pdf("panelf.pdf", height = 3, width = 3) | ||
plot(glasso.graph.confounded[[150]],layout=layout,vertex.label=1:8,vertex.size=50,edge.color="limegreen",edge.width=4, vertex.color="cornsilk") | ||
dev.off() | ||
## corrected | ||
plot.z.data.corrected = melt(z.data.corrected[1:10,]) | ||
plot.z.data.corrected$Var1 <- factor(plot.z.data.corrected$Var1) | ||
plot.z.data.corrected$Var2 <- factor(plot.z.data.corrected$Var2) | ||
plot.cor.data.corrected = melt(cor.data.corrected) | ||
plot.cor.data.corrected$Var1 <- factor(plot.cor.data.corrected$Var1) | ||
plot.cor.data.corrected$Var2 <- factor(plot.cor.data.corrected$Var2) | ||
### Panel g | ||
panelg <- ggplot(plot.z.data.corrected, aes(Var1, Var2))+ | ||
geom_tile(aes(fill = value))+ | ||
scale_fill_gradientn(colours = col.expr(50), limits = c(-2.5,2.5))+ theme_bw() + xlab("Sample") + ylab("Gene") | ||
### Panel h | ||
panelh <- ggplot(plot.cor.data.corrected, aes(Var1, Var2))+ | ||
geom_tile(aes(fill = value))+ | ||
scale_fill_gradientn(colours = col.corr(50), | ||
limits = c(-1,1))+theme_bw() + xlab("Gene") + ylab("Gene") | ||
### Panel i | ||
pdf("paneli.pdf", height = 3, width = 3) | ||
plot(glasso.graph.corrected.lambda[[150]],layout=layout,vertex.label=1:8,vertex.size=50,edge.color="limegreen",edge.width=4, vertex.color="cornsilk") | ||
dev.off() | ||
fig1 <- plot_grid(panela + theme(legend.position="none"), | ||
panelb + theme(legend.position="none"), | ||
NULL, | ||
paneld + theme(legend.position="none"), | ||
panele + theme(legend.position="none"), | ||
NULL, | ||
panelg + theme(legend.position="none"), | ||
panelh + theme(legend.position="none"), | ||
NULL, | ||
labels=letters[1:9], ncol = 3) | ||
legenda <- get_legend(panela + theme(legend.direction = "horizontal", legend.position = "top")) | ||
legendb <- get_legend(panelb + theme(legend.direction = "horizontal", legend.position = "top")) | ||
legend <- plot_grid(legenda, legendb, NULL, ncol = 3) | ||
fig1 <- plot_grid( legend, fig1, rel_heights = c(.1, 1), ncol = 1) | ||
pdf("merged.pdf", height = 6, width = 6) | ||
print(fig1) | ||
dev.off() | ||
``` |
Oops, something went wrong.