generated from biobakery/Analysis-workflows-template
-
Notifications
You must be signed in to change notification settings - Fork 0
/
extendeddatafig2b.R
executable file
·56 lines (49 loc) · 2.55 KB
/
extendeddatafig2b.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
#!/usr/bin/env Rscript
##################################################
#R program for creating Extended Data Figure 2b
##################################################
library(tibble)
library(dplyr)
library(tidyr)
library(ggplot2)
library(ggcorrplot)
library(pheatmap)
library(gplots)
library(plotrix)
#Read in the two files from halla
lean_association_oral<-read.table('all_associations_oral_lean.txt', header=T, sep = "")
nonlean_association_oral<-read.table('all_associations_oral_nonlean.txt', header=T, sep = "")
#join the datasets and get absolute difference and signs
lean_nonlean_merged_oral <- left_join(nonlean_association_oral,lean_association_oral,by=c("X_features","Y_features")) %>%
mutate(absdiff=abs(association.x-association.y)) %>% mutate(sign=association.x*association.y) %>%
filter(!is.na(absdiff)) %>%
mutate(group=case_when(association.x>=0 & association.y>=0 ~"+,+", association.x<=0 & association.y<=0 ~"-,-",
association.x>=0 & association.y<=0 ~"+,-", association.x<=0 & association.y>=0 ~"-,+")) %>%
mutate(groupinnum=case_when(association.x>=0 & association.y>=0 ~1, association.x<=0 & association.y<=0 ~2,
association.x>=0 & association.y<=0 ~3, association.x<=0 & association.y>=0 ~4))
#significance calculation
library(psych)
for(i in 1:nrow(lean_nonlean_merged_oral))
{
summary_test<-r.test(n = 174, r12 = lean_nonlean_merged_oral$association.x[i],
n2 = 37, r34 = lean_nonlean_merged_oral$association.y[i])
lean_nonlean_merged_oral$difference_p[i]<-as.numeric(summary_test$p)
}
lean_nonlean_merged_oral$fdr_qval<-p.adjust(lean_nonlean_merged_oral$difference_p,method="fdr")
to_spread<-lean_nonlean_merged_oral %>% select(X_features,Y_features,groupinnum)
to_cluster <- spread(to_spread, Y_features, groupinnum) %>% column_to_rownames("X_features") %>% as.matrix()
rowclust1 = hclust(dist(to_cluster))
colclust1 = hclust(dist(t(to_cluster)))
clustered_df = to_cluster[rowclust1$order,colclust1$order]
lean_nonlean_merged_oral_sig<-lean_nonlean_merged_oral %>% filter(difference_p<0.05)
#16*10.5
###Extended Data Figure 2b
#difference heatmap with all data
ggplot(lean_nonlean_merged_oral, aes(x = X_features, y = Y_features, color = group)) +
geom_count(aes(size = absdiff)) +
scale_size_area(max_size = 3) +
coord_flip() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1, size = 12),
axis.text.y = element_text(size = 12)) +
scale_y_discrete(limits = colnames(clustered_df)) +
scale_x_discrete(limits = rownames(clustered_df))