-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathtassel_create_tree.R
71 lines (57 loc) · 1.96 KB
/
tassel_create_tree.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
library(phangorn)
#change working directory
args <- commandArgs(trailingOnly=T)
# args[1] = "pel2b.filtered.csv"
# args[2] = "pel2b"
### READ DATA
# Read genotype data created tassel_filter_hapmap.R
genotype.seqs <- read.csv(args[1])
# remove tag seqs for tree generation
genotype <- genotype.seqs[,seq(2,length(genotype.seqs)-1)]
row.names(genotype) <- genotype.seqs$X # restore row names
### MAKE TREE
# Make tree, define colour groups
tree <- hclust(as.dist(1-cor(genotype,use = "pairwise.complete.obs")))
tree.groups <- cutree(tree, k=5)
write.csv(tree.groups, file=paste(args[2], "phylogroups.csv", sep="."))
# Assigns each phylogenetic group a colour
plot.col <- rainbow(max(as.numeric(tree.groups)))[as.numeric(tree.groups)]
### MAKE TREE FIGURE
pdf(file=paste(args[2], "tree.pdf", sep="."))
# Plot tree, including coloured boxes around different phylogenetic groups
plot(
tree,
cex = 0.7, #cex scales text by 0.5
#sub=paste("sample_cutoff=", sample.cutoff, "snp_cutoff=", snp.cutoff),
xlab=""
)
rect.hclust(
tree,
k=5,
border=rainbow(max(as.numeric(tree.groups))),
cluster=tree.groups
)
### MAKE BOOTSTRAPPED TREE
#make distance matrix and inital tree with upgma
#snp.cor.dist <- as.dist(1 - cor(genotype,use = "pairwise.complete.obs"))
snp.euc.dist <- dist(t(genotype)) # problems with missing data
initial.tree <- upgma(snp.euc.dist)
## Calculate boostrapped tree
bs <- list()
for (i in 1:100){
# resample data
bs.data <- sample(nrow(genotype),replace=T)
# get dist matrix of resampled data
# snp.cor.bs <- as.dist(1-cor(genotype[bs.data,],use = "pairwise.complete.obs"))
snp.cor.bs <- dist(t(genotype[bs.data,],use = "pairwise.complete.obs"))
# store tree of resampled data
bs[[i]] <- upgma(snp.cor.bs)
print(i)
}
## Make bootstrapped tree
bs.tree <- plotBS(initial.tree, bs)
## Print Bootstrapped tree
pdf(file=paste(args[2], "treeBS.pdf", sep="."))
plot(initial.tree, cex=0.5)
nodelabels(bs.tree$node.label,frame="none", cex=0.5)
dev.off()