forked from Danning16/MaizeMicrobiome2022
-
Notifications
You must be signed in to change notification settings - Fork 0
/
asv_downstream_analysis.R
182 lines (160 loc) · 8 KB
/
asv_downstream_analysis.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
library(ggplot2)
library(RColorBrewer)
library(tidyverse)
library(DESeq2)
library(vegan)
library(phyloseq)
library(dplyr)
library(reshape2)
library(biomformat)
library(qiime2R)
library(FSA)
library(lme4)
library(rcompanion)
library(viridis)
mytheme = theme_bw() +
theme(plot.margin = unit(c(1,1,1,1), "cm"), plot.title = element_text(size = 10),
panel.border = element_blank(), panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"),
axis.text = element_text(colour = "black", size = 8))
setwd("~/genotypes129")
#-----------------------------------------------------------------------#
# import to phyloseq #
#-----------------------------------------------------------------------#
# read asv biom table
biom_table = read_biom("./data/ITS/feature-table.biom")
rm(biom_table)
feature.table = as.matrix(biom_data(biom_table))
dim(feature.table)
# convert feature ID to ASV
featureID2asv = cbind("FeatureID"=rownames(feature.table), "ASV"=paste("fASV", seq(1, nrow(feature.table)), sep = ""))
write.table(featureID2asv, "./intermediate_data/ITS/pre-analysis/fungi_feature_ID_to_ASV_file.txt", row.names = F, sep = "\t", col.names = T)
asv.table = feature.table
rownames(asv.table) = featureID2asv[,2]
# read taxonomy
taxonomy = read.table("./data/ITS/taxonomy.tsv", sep = "\t", header = T)
taxa.table = taxonomy %>%
separate(Taxon, c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species"), sep = ";") %>%
mutate(Kingdom = gsub("k__", "", Kingdom),
Phylum = gsub("p__", "", Phylum),
Class = gsub("c__", "", Class),
Order = gsub("o__", "", Order),
Family = gsub("f__", "", Family),
Genus = gsub("g__", "", Genus),
Species = gsub("s__", "", Species))
table(rownames(feature.table) == taxa.table$Feature.ID)
rownames(taxa.table) = featureID2asv[, 2]
taxa.table = taxa.table[, -1]
taxa.table = as.matrix(taxa.table)
# read metadata file
md = read.csv("./data/16S/samples_metadata_new.csv", header = T)
# create phyloseq object
OTU.tab = otu_table(asv.table, taxa_are_rows = TRUE)
TAX.tab = tax_table(taxa.table)
md = md[match(colnames(OTU.tab), md$Sample.ID), ]
rownames(md) = md$Sample.ID
sample.tab = sample_data(md)
table(sample.tab$Sample.ID == colnames(OTU.tab))
ps = phyloseq(OTU.tab, TAX.tab, sample.tab)
# read phylogenetic tree file
rooted.tree = qza_to_phyloseq(tree = "./data/ITS/rooted-tree.qza")
table(rooted.tree$tip.label %in% rownames(feature.table))
# change tip label to ASV number
new_tiplabel = featureID2asv[match(rooted.tree$tip.label, featureID2asv[,1]), 1]
table(rooted.tree$tip.label == new_tiplabel)
new_tiplabel = featureID2asv[match(rooted.tree$tip.label, featureID2asv[,1]), 2]
rooted.tree$tip.label = new_tiplabel
ps.tree = merge_phyloseq(ps, phy_tree(rooted.tree))
ps.tree
saveRDS(ps.tree, "./intermediate_data/ITS/pre-analysis/raw_fungi_asv_table_phyloseq.RDS")
table(tax_table(ps.tree)[, "Phylum"]) # remove unidentified asv at phylum level
ps.tree.prefiltered = subset_taxa(ps.tree, Phylum!=' unidentified')
# function for filter taxa those express at least K reads in M*3 samples
expressKinM = function(K, M, otuTab){
keep.taxa = c()
for (i in 1:(ncol(otuTab)-1)) {
print(i)
if(sum(table(otuTab[otuTab[, i]>=K, ]$rep)>=3) >= M){
keep.taxa = c(keep.taxa, colnames(otuTab)[i])
}
}
return(keep.taxa)
}
# filter lowly-expressed taxa
tmp.otu = as.data.frame(cbind(t(otu_table(ps.tree.prefiltered)), "rep"=as.factor(rep(1:1056, each=3))))
keep.tax = expressKinM(10, 2, tmp.otu)
ps.tree.filtered.abund = subset_taxa(ps.tree.prefiltered, taxa_names(ps.tree.prefiltered)%in%keep.tax)
range(sample_sums(ps.tree.filtered.abund))
#--------------------------------------------------------------------#
# alpha diversity #
#--------------------------------------------------------------------#
setwd("~/genotypes129/results/ITS/alpha_div")
# plot rarefaction curve
rarecurve(t(otu_table(ps.tree.filtered.abund)), step=100, label = F, sample = c(1000,8000))
# rarefaction
ps.rare = rarefy_even_depth(ps.tree.filtered.abund, 1000, rngseed = 1625458, replace = FALSE)
# generate a data.frame with alpha diversity measures
adiv = data.frame(
"Observed" = phyloseq::estimate_richness(ps.rare, measures = "Observed"),
"Shannon" = phyloseq::estimate_richness(ps.rare, measures = "Shannon"))
adiv = cbind(adiv, sample_data(ps.rare))
write.table(adiv, file = "~/genotypes129/results/ITS/alpha_div/rarefied1000reads/alpha_diversity_table_1000.txt", sep = "\t", quote = F)
# box plot of alpha diversity
alphaDivPlot = function(alphaDivDF, plotFator1, plotFator2, levelsOrder, divIndex){
print(divIndex)
alphaDivDF$CompTre = paste(alphaDivDF[,plotFator1], alphaDivDF[,plotFator2], sep = "_")
#f <- reformulate(divIndex, response="CompTre")
f = as.formula(paste(divIndex, "CompTre", sep="~"))
print(f)
# Kruskal-Wallis test
krus.res = kruskal.test(f, data = alphaDivDF)
dunn.res = dunnTest(f, data = alphaDivDF, method = "bh")
dunn.res$res$Compartment = gsub("_.*$", "", dunn.res$res$Comparison)
#comOrd = c("Soil", "Rhizosphere", "Root")
dunn.res$res = dunn.res$res[order(match(dunn.res$res$Compartment, levelsOrder)), ]
write.csv(dunn.res$res, paste(divIndex, "dunnTest_results.csv", sep = "_"), quote = F, row.names = F)
# convert adjusted p value to letters
label.dunn = cldList(P.adj ~ Comparison, data = dunn.res$res, threshold = 0.05)[, 1:2]
label.dunn[, plotFator1] = gsub("_.*$", "", label.dunn$Group)
label.dunn[, plotFator2] = sub("^[^_]*_", "", label.dunn$Group)
label.dunn[, plotFator1] = factor(label.dunn[, plotFator1], levels = levelsOrder)
print(label.dunn)
alphaDivDF[, plotFator1] = factor(alphaDivDF[, plotFator1], levels = levelsOrder)
#print(head(alphaDivDF))
# box plot
temp = ggplot(data= alphaDivDF, aes_string(x = plotFator1, y = divIndex, color=plotFator2)) +
geom_boxplot(outlier.colour = NA) +
geom_point(position=position_jitterdodge(jitter.width = 0.1, dodge.width = 0.75),
aes_string(color=plotFator2), alpha=0.6, size=1) +
scale_color_viridis(discrete=TRUE) +
geom_text(data = label.dunn, aes(y=rep(max(alphaDivDF[, divIndex])*1.1, nrow(label.dunn)),
label=Letter), position = position_dodge(width = 1)) +
ylim(NA, max(alphaDivDF[, divIndex])*1.2) +
labs(
x = plotFator1,
y = divIndex,
title = paste(paste("Kruskal-Wallis chi-squared", signif(krus.res$statistic, 5), sep = "="),
paste("df", krus.res$parameter, sep = "="),
paste("p value", signif(krus.res$p.value,3), sep = "="), sep = " ")
) + mytheme
ggsave(temp, file = paste(divIndex, ".pdf", sep = ""), width = 16, height = 10, units = "cm")
}
alphaDivPlot(adiv, "Compartment", "Treatment", c("Soil", "Rhizosphere", "Root"), "Shannon")
#---------------------------------------------------------------------#
# beta diversity #
#---------------------------------------------------------------------#
# DESeq2 Normalization
bac.deseq = phyloseq_to_deseq2(ps.tree.filtered.abund, ~Block+Compartment)
bac.deseq = estimateSizeFactors(bac.deseq, type="poscount")
bac.vst = varianceStabilizingTransformation(bac.deseq, blind=F)
# REMOVE batch effects
bac.vst$batch =as.factor(paste(bac.vst$Harvest.batch, bac.vst$Treatment.batch, sep = "_"))
bac.vst$plotBatch = as.factor(paste(bac.vst$Block, bac.vst$Plot, sep = "_"))
mm <- model.matrix(~Compartment, colData(bac.vst))
mat <- limma::removeBatchEffect(assay(bac.vst), batch=bac.vst$plotBatch,
batch2 = bac.vst$batch, design=mm)
assay(bac.vst) <- mat
bac.vst.norm = ps.tree.filtered.abund
otu_table(bac.vst.norm) = otu_table(assay(bac.vst), taxa_are_rows = T)
# save normalized ASV file after removing batch effects
saveRDS(bac.vst.norm, "./ps.abund.vst.norm.rmBatch.RDS")