Skip to content

Commit

Permalink
update ancestral code
Browse files Browse the repository at this point in the history
  • Loading branch information
KlausVigo committed Sep 9, 2024
1 parent 50be4d8 commit b49b151
Show file tree
Hide file tree
Showing 10 changed files with 239 additions and 137 deletions.
259 changes: 176 additions & 83 deletions R/ancestral.R

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions R/phyDat_conversion.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#' @aliases
#' as.phyDat.character as.phyDat.data.frame as.phyDat.matrix
#' as.MultipleAlignment as.MultipleAlignment.phyDat
#' as.StringSet as.StringSet.phyDat
#' acgt2ry phyDat2MultipleAlignment
#' @param data An object containing sequences.
#' @param x An object containing sequences.
Expand Down
28 changes: 16 additions & 12 deletions R/plotAnc.R
Original file line number Diff line number Diff line change
Expand Up @@ -65,15 +65,15 @@ getAncDF <- function(x){
#' example(NJ)
#' # generate node labels to ensure plotting will work
#' tree <- makeNodeLabel(tree)
#' anc.p <- ancestral.pars(tree, Laurasiatherian)
#' anc.p <- anc_pars(tree, Laurasiatherian)
#' # plot the third character
#' plotAnc(anc.p, 3, pos="topright")
#' plotSeqLogo(anc.p, node="Node10", 1, 25)
#'
#' data(chloroplast)
#' tree <- pratchet(chloroplast, maxit=10, trace=0)
#' tree <- makeNodeLabel(tree)
#' anc.ch <- ancestral.pars(tree, chloroplast)
#' anc.ch <- anc_pars(tree, chloroplast)
#' image(as.phyDat(anc.ch)[, 1:25])
#' plotAnc(anc.ch, 21, scheme="Ape_AA", pos="topleft")
#' plotAnc(anc.ch, 21, scheme="Clustal", pos="topleft")
Expand All @@ -88,7 +88,7 @@ plotAnc <- function(x, i = 1, type="phylogram", ..., col = NULL,
type <- match.arg(type, c("phylogram", "cladogram", "fan", "unrooted",
"radial", "tidy"))
phylo_clado <- type %in% c("phylogram", "cladogram")
df <- getAncDF(x)
df <- as.data.frame(x)
data <- x$data
tree <- x$tree
subset <- df[,"Site"] == i
Expand Down Expand Up @@ -158,32 +158,36 @@ plotAnc <- function(x, i = 1, type="phylogram", ..., col = NULL,
my_ggseqlogo <-function (data, facet = "wrap", scales = "free_x", ncol = NULL,
nrow = NULL, start=NULL, end=NULL, ...)
{
x <- geom_logo(data = data, ...)
x[[2]] <- scale_x_continuous(limits = c(start-0.5, end+.5) ,
x <- ggseqlogo::geom_logo(data = data, ...)
x[[2]] <- ggplot2::scale_x_continuous(limits = c(start-0.5, end+.5) ,
breaks=pretty(seq(start, end)))
p <- ggplot() + x + theme_logo()
p <- ggplot2::ggplot() + x + ggseqlogo::theme_logo()
if (!"list" %in% class(data)) return(p)
facet <- match.arg(facet, c("grid", "wrap"))
if (facet == "grid") {
p <- p + facet_grid(~seq_group, scales = scales)
p <- p + ggplot2::facet_grid(~seq_group, scales = scales)
}
else if (facet == "wrap") {
p <- p + facet_wrap(~seq_group, scales = scales, nrow = nrow, ncol = ncol)
p <- p + ggplot2::facet_wrap(~seq_group, scales = scales, nrow = nrow,
ncol = ncol)
}
return(p)
}


#' @rdname plot.ancestral
#' @importFrom ggplot2 scale_x_continuous ggplot facet_grid facet_wrap
#' @importFrom ggseqlogo geom_logo theme_logo
## @importFrom ggplot2 scale_x_continuous ggplot facet_grid facet_wrap
## @importFrom ggseqlogo geom_logo theme_logo
#' @returns \code{plotSeqLogo} returns a ggplot object.
#' @export
plotSeqLogo <- function(x, node=getRoot(x$tree), start=1, end=10, scheme="Ape_NT", ...){
plotSeqLogo <- function(x, node=getRoot(x$tree), start=1, end=10,
scheme="Ape_NT", ...){
stopifnot(inherits(x, "ancestral"))
chk <- requireNamespace("ggseqlogo", quietly = TRUE)
if (!chk) stop("package ggseqlogo needs to be not installed!\n")
type <- attr(x$data, "type")
tree <- x$tree
df <- getAncDF(x)
df <- as.data.frame(x)
nodes <- c(tree$tip.label, tree$node.label)
if(is.numeric(node)) node <- nodes[node]
subset <- df[,"Node"] == node
Expand Down
20 changes: 10 additions & 10 deletions inst/tinytest/test_ancestral.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@ fit <- pml(tree, dna)
# dna tests differs from other data types as it may returns ambiguous data
# test ancestral generics
# test.ml1 <- ancestral.pml(fit, type = "ml")
test_ml <- ancestral.pml(fit, type = "ml")
test_mpr <- ancestral.pars(tree, dna, "MPR")
test_acctran <- ancestral.pars(tree, dna, "ACCTRAN")
#test_ml <- ancestral.pml(fit, type = "ml")
#test_mpr <- ancestral.pars(tree, dna, "MPR")
#test_acctran <- ancestral.pars(tree, dna, "ACCTRAN")

#expect_equal(as.character(test_ml), as.character(test_acctran))
#expect_equal(as.character(test_ml), as.character(test_mpr))
Expand All @@ -31,16 +31,16 @@ test_acctran <- ancestral.pars(tree, dna, "ACCTRAN")
data(Laurasiatherian)
fit <- pml_bb(Laurasiatherian[,1:100], "JC", rearrangement = "none",
control=pml.control(trace=0))
anc_ml <- ancestral.pml(fit)
anc_ml <- anc_pml(fit)
write.ancestral(anc_ml)
align <- read.phyDat("ancestral_align.fasta")
align <- read.phyDat("ancestral_align.fasta", format = "fasta")
tree <- read.tree("ancestral_tree.nwk")
df <- read.table("ancestral.state", header=TRUE)
anc_ml_disc <- ancestral(tree, align, df)
expect_equal(anc_ml[[1]], anc_ml_disc[[1]])
expect_equal(anc_ml[[2]][tree$tip.label], anc_ml_disc[[2]])
expect_equal(anc_ml[[3]], anc_ml_disc[[3]])
expect_equal(anc_ml[[4]], anc_ml_disc[[4]])
anc_ml_disc <- as.ancestral(tree, align, df)
expect_equal(anc_ml$tree, anc_ml_disc$tree)
expect_equal(anc_ml$data[tree$tip.label], anc_ml_disc$data)
#expect_equal(anc_ml[[3]], anc_ml_disc[[3]])
expect_equal(anc_ml$state, anc_ml_disc$state)
unlink(c("ancestral_align.fasta", "ancestral_tree.nwk", "ancestral.state"))


Expand Down
23 changes: 18 additions & 5 deletions man/ancestral.pml.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion man/as.phyDat.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions man/plot.ancestral.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 5 additions & 5 deletions man/write.ancestral.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion tests/testthat/test_plot_ancestral.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ dat <- matrix(c("a", "a",
dimnames = list(c("t1", "t2", "t3", "t4"), NULL))
dna <- phyDat(dat)
fit <- pml(tree, dna)
test_ml <- ancestral.pml(fit, type = "ml")
test_ml <- anc_pml(fit)


test_that("Pie_plots works", {
Expand Down
26 changes: 8 additions & 18 deletions vignettes/Ancestral.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -44,26 +44,18 @@ In _phangorn_ all the ancestral reconstructions for parsimony used to be based o
There is also an option "POSTORDER" which is only a one pass algotrithm, which we use for teaching purposes.
The function acctran (accelerated transformation) assigns edge length and internal nodes to the tree [@Swofford1987].
```{r ancestral_reconstruction}
anc.acctran <- ancestral.pars(tree, primates, "ACCTRAN")
anc.mpr <- ancestral.pars(tree, primates, "MPR")
anc.pars <- anc_pars(tree, primates)
```

The `plotSeqLogo` function is a wrapper around the from the _ggseqlogo_ function in the _ggseqlogo_ package [@ggseqlogo] and provides a simple way to show proportions of a nucleotides of ancestral states (see figure 1).

```{r seqLogo, fig.cap="Fig 1. Ancestral reconstruction for a node.", eval=TRUE}
#library(seqLogo)
#seqLogo( t(subset(anc.mpr, getRoot(tree), 1:20)[[1]]), ic.scale=FALSE)
plotSeqLogo(anc.mpr, node=getRoot(tree), 1, 20)
plotSeqLogo(anc.pars, node=getRoot(tree), 1, 20)
```
```{r MPR, fig.cap="Fig 2. Ancestral reconstruction using MPR."}
plotAnc(anc.mpr, 17)
plotAnc(anc.pars, 17)
title("MPR")
```
```{r ACCTRAN, fig.cap="Fig 3. Ancestral reconstruction using ACCTRAN."}
plotAnc(anc.acctran, 17)
title("ACCTRAN")
```


# Likelihood reconstructions

Expand All @@ -82,20 +74,18 @@ and the highest posterior probability ("bayes") criterion:
\[
P(x_r=A) = \frac{\pi_A L(x_r=A)}{\sum_{k \in \{A,C,G,T\}}\pi_k L(x_r=k)},
\]
where $L(x_r)$ is the joint probability of states at the tips and the state at the root $x_r$ and $\pi_i$ are the estimated base frequencies of state $i$.
Both methods agree if all states (base frequencies) have equal probabilities.
where $L(x_r)$ is the joint probability of states at the tips and the state at the root $x_r$ and $\pi_i$ are the estimated base frequencies of state $i$. Both methods agree if all states (base frequencies) have equal probabilities.
```{r ML_reconstruction}
anc.ml <- ancestral.pml(fit, "ml")
anc.bayes <- ancestral.pml(fit, "bayes")
anc.ml <- anc_pml(fit)
```
The differences of the two approaches for a specific site (17) are represented in the following figures.
```{r plotML, fig.cap="Fig 4. Ancestral reconstruction the using the maximum likelihood."}
plotAnc(anc.ml, 17)
title("ML")
```
```{r plotB, fig.cap="Fig 5. Ancestral reconstruction using (empirical) Bayes."}
plotAnc(anc.bayes, 17)
title("Bayes")
#plotAnc(anc.bayes, 17)
#title("Bayes")
```

# Fitting for discrete comparative data
Expand Down Expand Up @@ -143,7 +133,7 @@ all.equal(fit_SYM$logLik, fit_ace$loglik+log(1/3))


```{r SYM_reconstruction}
anc_SYM <- ancestral.pml(fit_SYM, "ml")
anc_SYM <- anc_pml(fit_SYM)
plotAnc(anc_SYM)
```

Expand Down

0 comments on commit b49b151

Please sign in to comment.