-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathEdgelist_Creation.R
145 lines (117 loc) · 6.57 KB
/
Edgelist_Creation.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
######Edge creation######
#input is "results" from Pairwise_Cooccurrence_Calcs
library(igraph)
library(fdrtool)
library(ggplot2)
co_occur_pairs<-function(dataset){
final.results<-data.frame()
rhos<-c(-.75,-.5,.5,.75)
trts<-as.vector(unique(dataset$trt))
for(t in 1:length(trts)){
#t<-1
dataset_trt<-subset(dataset, trt==trts[t])
dataset_trt_no0<-subset(dataset_trt, ab1 > 2 & ab2 > 2)#Ryan had this at 0
dataset_trt_no0$pairs<-paste(dataset_trt_no0$taxa1,dataset_trt_no0$taxa2)
for(r in 1:4){
#r<-5
if(rhos[r] < 0){temp<-subset(dataset_trt_no0, rho <= rhos[r])}
if(rhos[r] > 0){temp<-subset(dataset_trt_no0, rho >= rhos[r])}
if(dim(temp)[1]>1){
#temp.graph<-simplify(graph.edgelist(as.matrix(temp[,c(2,3)]),directed=FALSE))#Emily: this reorders the edges and the list of species, so it is not the same as temp. thus below the merge misses some species because the name is first on one list and second on the other. the only thing this does is remove loops and multiple edges, but there should be any of these things in the data set to begin with, so I'm skipping this.
#edge_list<-data.frame(get.edgelist(temp.graph,names=TRUE))
#edge_list$pairs<-paste(edge_list$X1,edge_list$X2)
#edge_list_pvals<-merge(edge_list,dataset_trt_no0,by="pairs",sort=FALSE )
temp$rho_cut<-rhos[r]
#edge_list_pvals$trt<-trts[t]
#temp$qval1<-fdrtool(temp$p.value,statistic="pvalue",plot=FALSE,verbose=FALSE)$qval
temp$qval2<-p.adjust(temp$p.value, method="fdr")#using a different method for p-value adjustment. the fdrtool wasn't working when I had a lot of p values that were significant and none around 1
temp<-cbind(pairs=temp$pairs,temp[,1:7],temp[,9:10])
final.results<-rbind(final.results,temp) }
}
print(t/length(trts))
}
return(final.results)
}
edge_lists<-co_occur_pairs(results)
dim(subset(edge_lists,rho>.5))
head(edge_lists)
######Edge creation II - with a p cutoff rather than a rho cutoff######
#
co_occur_pairs_all<-function(dataset){
final.results<-data.frame()
trts<-as.vector(unique(dataset$trt))
for(t in 1:length(trts)){
dataset_trt<-subset(dataset, trt==trts[t])
dataset_trt_no0<-subset(dataset_trt, ab1 > 0 & ab2 > 0)#Ryan had this at 0
dataset_trt_no0$pairs<-paste(dataset_trt_no0$taxa1,dataset_trt_no0$taxa2)
temp<-dataset_trt_no0
if(dim(temp)[1]>1){
temp$qval1<-fdrtool(temp$spearmanp.value,statistic="pvalue",plot=FALSE,verbose=FALSE)$qval
temp$qval2<-p.adjust(temp$spearmanp.value, method="fdr")#using a different method for p-value adjustment. the fdrtool wasn't working when I had a lot of p values that were significant and none around 1
temp<-cbind(pairs=temp$pairs,temp[,1:11],temp[,13:14])
#temp<-cbind(pairs=temp$pairs,temp[,1:7],temp[,9:10])
final.results<-rbind(final.results,temp) }
print(t/length(trts))
}
return(final.results)
}
edge_listsno4orlower<-co_occur_pairs_all(resultsno4orlower)#spearman, p values are bad
edge_listsno4orlowerK<-co_occur_pairs_all(resultsno4orlowerK)#kelmann, pvalues good
edge_listsno4abunorlowerK<-co_occur_pairs_all(resultsno4abunorlowerK)
edge_listsno4orlowerK$qval3<-p.adjust(edge_listsno4orlowerK$p.value,method="fdr")
edge_listsKSno4<-cbind(pairs=paste(resultsKSno4$taxa1,resultsKSno4$taxa2),resultsKSno4)
edge_listsKSno4$qval<-p.adjust(edge_listsKSno4$spearmanp.value,method="fdr")
edge_listsKSno4b<-subset(edge_listsKSno4,spearmanrho>0)
edge_listsKSno5<-cbind(pairs=paste(resultsKSno5$taxa1,resultsKSno5$taxa2),resultsKSno5)
edge_listsKSno5$qval<-p.adjust(edge_listsKSno5$spearmanp.value,method="fdr")
edge_listsKSno5b<-subset(edge_listsKSno5,spearmanrho>0)
edge_listsKSno3<-cbind(pairs=paste(resultsKSno3$taxa1,resultsKSno3$taxa2),resultsKSno3)
edge_listsKSno3$qval<-p.adjust(edge_listsKSno3$spearmanp.value,method="fdr")
edge_listsKSno3b<-subset(edge_listsKSno3,spearmanrho>0)
edge_listsKS32no2<-cbind(pairs=paste(resultsKS32no2$taxa1,resultsKS32no2$taxa2),resultsKS32no2)
edge_listsKS32no2$qval<-p.adjust(edge_listsKS32no2$spearmanp.value,method="fdr")
edge_listsKS32no2b<-subset(edge_listsKS32no2,spearmanrho>0)
edge_listsKS32p<-cbind(pairs=paste(resultsKS32p$taxa1,resultsKS32p$taxa2),resultsKS32p)#the frequency cutoff was >2 *I think*, this was done within the loop so not here
edge_listsKS32p$qval<-p.adjust(edge_listsKS32p$spearmanp.value,method="fdr")
edge_listsKS32pb<-subset(edge_listsKS32p,spearmanrho>0)
head(edge_listsKS32pb)
######Final bacteria, euk, plant network#####
edge_listsBEP<-cbind(pairs=paste(resultsBEP$taxa1,resultsBEP$taxa2),resultsBEP)
edge_listsBEP$qval<-p.adjust(edge_listsBEP$spearmanp.value,method="fdr")
edge_listsBEPb<-subset(edge_listsBEP,ab1freq>10&ab2freq>10)
#edge_listsBEPb<-subset(edge_listsBEP,ab1freq>5&ab2freq>5)
edge_listsBEPb$qval<-p.adjust(edge_listsBEPb$spearmanp.value,method="fdr")
edge_listsBEPc<-subset(edge_listsBEPb,spearmanrho>0)
dim(edge_listsBEPc)
min(edge_listsBEPc$ab2freq)
dim(subset(edge_listsBEPc,trt=="hi"))
#dim(comm16S.spelo2)[2]+dim(commEuk.spelo2)[2]
#subset euks and proks out of the edge_listsBEP file
head(names16Sb)
head(namesEukb)
plantlabels
edge_listsBEP16S<-edge_listsBEP[which(edge_listsBEP$taxa1%in%c(names16Sb$otuxy,as.character(plantlabels$otuxy))&edge_listsBEP$taxa2%in%c(names16Sb$otuxy,as.character(plantlabels$otuxy))),]
head(edge_listsBEP16Sb)
dim(edge_listsBEP16S)
edge_listsBEP16Sb<-subset(edge_listsBEP16S,ab1freq>10&ab2freq>10)
edge_listsBEP16Sb$qval<-p.adjust(edge_listsBEP16Sb$spearmanp.value,method="fdr")
edge_listsBEP16Sc<-subset(edge_listsBEP16Sb,spearmanrho>0)
edge_listsBEPEuk<-edge_listsBEP[which(edge_listsBEP$taxa1%in%c(namesEukb$otuxy,as.character(plantlabels$otuxy))&edge_listsBEP$taxa2%in%c(namesEukb$otuxy,as.character(plantlabels$otuxy))),]
head(edge_listsBEPEuk)
dim(edge_listsBEPEuk)
edge_listsBEPEukb<-subset(edge_listsBEPEuk,ab1freq>6&ab2freq>6)
edge_listsBEPEukb$qval<-p.adjust(edge_listsBEPEukb$spearmanp.value,method="fdr")
edge_listsBEPEukc<-subset(edge_listsBEPEukb,spearmanrho>0)
#####Nematode correlations######
edge_listsN<-cbind(pairs=paste(resultsN$taxa1,resultsN$taxa2),resultsN)
edge_listsNb<-subset(edge_listsN,ab1freq>6&ab2freq>6)
edge_listsNb$qval<-p.adjust(edge_listsNb$spearmanp.value,method="fdr")
edge_listsNc<-subset(edge_listsNb,spearmanrho>0)
dim(edge_listsNc)
#subset euks out from nematodes
edge_listsNEuk<-edge_listsN[which(edge_listsN$taxa2%in%c(namesEukb$otuxy)),]
head(edge_listsNEuk)
dim(edge_listsNEuk)
edge_listsNEukb<-subset(edge_listsNEuk,ab1freq>3&ab2freq>3)
edge_listsNEukb$qval<-p.adjust(edge_listsNEukb$spearmanp.value,method="fdr")
edge_listsNEukc<-subset(edge_listsNEukb,spearmanrho>0)