-
Notifications
You must be signed in to change notification settings - Fork 1
/
GSE52271.R
55 lines (45 loc) · 2.26 KB
/
GSE52271.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
# For PCA Analysis to methylation 450K dataset
# for ips methylatin 450K analysis
GSE52271
library("GEOquery")
GSE56851 <- getGEO("GSE56851",destdir="/home/sguo/monod/data/geo")
save(GSE56851, file="GSE56851_matrix.Rdata")
library("GEOquery")
load("GSE56851_matrix.Rdata")
data <- as.data.frame(exprs(GSE56851[[1]]))
phen <- pData(phenoData(GSE56851[[1]]))
phen1<-as.character(phen$characteristics_ch1.2)
phen2<-as.character(phen$title)
phen2[grep("mechanically",phen$title)]<-"mechanically";
phen2[grep("enzymatically",phen$title)]<-"enzymatically";
PCAPlot<-function(data,pheno,output,multifigure=T){
pca <- prcomp(data,center=T,scale = F) # Here, input file: row is individual and column is variable
outputfile=paste(output,".pdf",sep="")
pdf(outputfile)
if(multifigure){
par(mfrow=c(2,2),mar=c(4,4,4,4))
}
plot((pca$sdev[1:10])^2,type="o",xaxt="n",ylab="Variances",xlab="Principle Components",col="red",lwd=2)
axis(1,at=0:10,labels=paste("PC",0:10,sep=""))
var<-c()
for(i in 1:length(pca$sdev)){var[i]<-sum((pca$sdev[1:i])^2)/sum((pca$sdev)^2)}
plot(var,ylab="total variance",xlab="number of principle components",lwd=2,type="l")
abline(h=0.8,col="grey",lty=2)
abline(v=which(var>0.8)[1],col="grey",lty=2)
scores <- data.frame(pheno, pca$x[,1:3])
col = as.numeric(as.factor(pheno))
plot(x=scores$PC1,y=scores$PC2, xlim=c(min(scores$PC1),max(scores$PC1)),ylim=c(min(scores$PC2),max(scores$PC2)),type="n",xlab="PC1",ylab="PC2")
for(i in 1:length(scores$PC1)){
points(scores$PC1[i],scores$PC2[i],pch=as.numeric(as.factor(pheno))[i],col=col[i],cex=0.8,lwd=2)
}
legend("bottomright",legend=names(table(pheno)),pch=1:length(table(pheno)),col=1:length(table(pheno)),bty="n")
plot(x=scores$PC1,y=scores$PC3, xlim=c(min(scores$PC1),max(scores$PC1)),ylim=c(min(scores$PC3),max(scores$PC3)),type="n",xlab="PC1",ylab="PC3")
for(i in 1:length(scores$PC1)){
points(scores$PC1[i],scores$PC3[i],pch=as.numeric(as.factor(pheno))[i],col=col[i],cex=0.9,lwd=2)
}
legend("bottomright",legend=names(table(pheno)),pch=1:length(table(pheno)),col=1:length(table(pheno)),bty="n")
dev.off()
}
data1=na.omit(data)
PCAPlot(t(data1),phen1,output="ES.pass.machnisum.phen1.pdf",multifigure=T)
PCAPlot(t(data1),phen2,output="ES.pass.machnisum.phen2.pdf",multifigure=T)