From a29255d423c0c8d98965cab09484e4feb868a1f5 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Thu, 27 Apr 2023 15:40:51 +0200 Subject: [PATCH 001/216] allow DNAbin/AAbin objects, faster pratchet --- R/bab.R | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/R/bab.R b/R/bab.R index ff8b57b3..63d1f4d3 100644 --- a/R/bab.R +++ b/R/bab.R @@ -167,8 +167,10 @@ pBound <- function(x, UB, LB) { #' #' @export bab bab <- function(data, tree = NULL, trace = 1, ...) { + if(inherits(data, "DNAbin") | inherits(data, "AAbin")) data <- as.phyDat(data) + if (!inherits(data, "phyDat")) stop("data must be of class phyDat") if (!is.null(tree)) data <- subset(data, tree$tip.label) - pBound <- TRUE + pBound <- FALSE nTips <- length(data) if (nTips < 4) return(stree(nTips, tip.label = names(data))) @@ -195,7 +197,7 @@ bab <- function(data, tree = NULL, trace = 1, ...) { o <- order(attr(data, "weight"), decreasing = TRUE) data <- subset(data, select = o, site.pattern=TRUE) - tree <- pratchet(data, start = tree, trace = trace - 1, ...) + tree <- pratchet(data, start = tree, trace = trace - 1, maxit=10, ...) data <- subset(data, tree$tip.label) nr <- as.integer(attr(data, "nr")) From 5b2474db0259b40963d70bdc4ce20698e35ae294 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Thu, 27 Apr 2023 16:25:29 +0200 Subject: [PATCH 002/216] simplify code --- R/parsimony.R | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/R/parsimony.R b/R/parsimony.R index 1882bf1a..02deb276 100644 --- a/R/parsimony.R +++ b/R/parsimony.R @@ -200,10 +200,9 @@ upperBound <- function(x, cost = NULL) { #' @rdname CI #' @export CI <- function(tree, data, cost = NULL, sitewise = FALSE) { - if (sitewise) pscore <- sankoff(tree, data, cost = cost, site = "site") - else pscore <- sankoff(tree, data, cost = cost) - weight <- attr(data, "weight") data <- subset(data, tree$tip.label) + pscore <- sankoff(tree, data, cost, ifelse(sitewise, "site", "pscore")) + weight <- attr(data, "weight") m <- lowerBound(data, cost = cost) if (sitewise) { return( (m / pscore)[attr(data, "index")]) @@ -215,9 +214,8 @@ CI <- function(tree, data, cost = NULL, sitewise = FALSE) { #' @rdname CI #' @export RI <- function(tree, data, cost = NULL, sitewise = FALSE) { - if (sitewise) pscore <- sankoff(tree, data, cost = cost, site = "site") - else pscore <- sankoff(tree, data, cost = cost) data <- subset(data, tree$tip.label) + pscore <- sankoff(tree, data, cost, ifelse(sitewise, "site", "pscore")) weight <- attr(data, "weight") m <- lowerBound(data, cost = cost) g <- upperBound(data, cost = cost) From bef9f53040bf084eb4419d5aeda42ec81920e9f6 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Fri, 5 May 2023 08:03:43 +0200 Subject: [PATCH 003/216] add tip.dates to allow nicer labeling with tip dated phylogenies --- R/Densi.R | 35 ++++++++++++++++++++++++++--------- man/densiTree.Rd | 6 ++++-- 2 files changed, 30 insertions(+), 11 deletions(-) diff --git a/R/Densi.R b/R/Densi.R index 5b4b2a7e..525bf9c6 100644 --- a/R/Densi.R +++ b/R/Densi.R @@ -91,10 +91,11 @@ add_tiplabels <- function(xy, tip.label, direction, adj, font, srt = 0, cex = 1, #' @param scale.bar a logical specifying whether add scale.bar to the plot. #' @param jitter allows to shift trees. a list with two arguments: the amount of #' jitter and random or equally spaced (see details below) +#' @param tip.dates A named vector of sampling times associated with the tips. #' @param \dots further arguments to be passed to plot. #' @author Klaus Schliep \email{klaus.schliep@@gmail.com} #' @seealso \code{\link{plot.phylo}}, \code{\link{plot.networx}}, -#' \code{\link{jitter}} +#' \code{\link{jitter}}, \code{\link{rtt}} #' @references densiTree is inspired from the great #' \href{https://www.cs.auckland.ac.nz/~remco/DensiTree/}{DensiTree} program of #' Remco Bouckaert. @@ -128,7 +129,8 @@ densiTree <- function(x, type = "cladogram", alpha = 1 / length(x), scaleX = FALSE, col = 1, width = 1, lty = 1, cex = .8, font = 3, tip.color = 1, adj = 0, srt = 0, underscore = FALSE, label.offset = 0, scale.bar = TRUE, - jitter = list(amount = 0, random = TRUE), ...) { + jitter = list(amount = 0, random = TRUE), tip.dates=NULL, + ...) { if (!inherits(x, "multiPhylo")) stop("x must be of class multiPhylo") if (is.character(consensus)) { consensus <- stree(length(consensus), tip.label = consensus) @@ -159,10 +161,25 @@ densiTree <- function(x, type = "cladogram", alpha = 1 / length(x), consensus <- sort_tips(consensus) consensus <- reorder(consensus, "postorder") + + at <- NULL maxBT <- max(getAges(x)) - if (scaleX) maxBT <- 1.0 - label <- rev(pretty(c(maxBT, 0))) - maxBT <- max(label) + if(!is.null(tip.dates)){ + root_time <- max(tip.dates) - maxBT + label <- pretty(c(root_time, max(tip.dates)), min.n = 3) + label <- label[label < max(tip.dates)] + maxBT <- max(maxBT, max(tip.dates) - min(label)) + at <- 1 - (max(tip.dates) - label) / maxBT + if(direction=="leftwards" || direction=="downwards") at <- at + 1- max(at) + scaleX=FALSE + } + else { + if (scaleX) maxBT <- 1.0 + label <- rev(pretty(c(maxBT, 0))) + maxBT <- max(label, maxBT) + at <- seq(0, 1.0, length.out = length(label)) + } + xy <- plotPhyloCoor(consensus, direction = direction, ...) yy <- xy[, 2] @@ -174,25 +191,25 @@ densiTree <- function(x, type = "cladogram", alpha = 1 / length(x), if (direction == "rightwards") { plot.default(0, type = "n", xlim = c(0, 1.0 + sw), ylim = c(0, nTip + 1), xlab = "", ylab = "", axes = FALSE, ...) - if (scale.bar) axis(side = 1, at = seq(0, 1.0, length.out = length(label)), + if (scale.bar) axis(side = 1, at = at, #seq(0, 1.0, length.out = length(label)), labels = label) } if (direction == "leftwards") { plot.default(0, type = "n", xlim = c(0 - sw, 1.0), ylim = c(0, nTip + 1), xlab = "", ylab = "", axes = FALSE, ...) - if (scale.bar) axis(side = 1, at = seq(0, 1.0, length.out = length(label)), + if (scale.bar) axis(side = 1, at = at, #seq(0, 1.0, length.out = length(label)), labels = rev(label)) } if (direction == "downwards") { plot.default(0, type = "n", xlim = c(0, nTip + 1), ylim = c(0 - sw, 1.0), xlab = "", ylab = "", axes = FALSE, ...) - if (scale.bar) axis(side = 2, at = seq(0, 1.0, length.out = length(label)), + if (scale.bar) axis(side = 2, at = at, #seq(0, 1.0, length.out = length(label)), labels = rev(label)) } if (direction == "upwards") { plot.default(0, type = "n", xlim = c(0, nTip + 1), ylim = c(0, 1.0 + sw), xlab = "", ylab = "", axes = FALSE, ...) - if (scale.bar) axis(side = 2, at = seq(0, 1.0, length.out = length(label)), + if (scale.bar) axis(side = 2, at = at, #seq(0, 1.0, length.out = length(label)), labels = label) } tip_labels <- consensus$tip.label diff --git a/man/densiTree.Rd b/man/densiTree.Rd index efd0844c..ddf281c3 100644 --- a/man/densiTree.Rd +++ b/man/densiTree.Rd @@ -8,7 +8,7 @@ densiTree(x, type = "cladogram", alpha = 1/length(x), consensus = NULL, direction = "rightwards", optim = FALSE, scaleX = FALSE, col = 1, width = 1, lty = 1, cex = 0.8, font = 3, tip.color = 1, adj = 0, srt = 0, underscore = FALSE, label.offset = 0, scale.bar = TRUE, - jitter = list(amount = 0, random = TRUE), ...) + jitter = list(amount = 0, random = TRUE), tip.dates = NULL, ...) } \arguments{ \item{x}{an object of class \code{multiPhylo}.} @@ -61,6 +61,8 @@ of the phylogeny and their corresponding labels.} \item{jitter}{allows to shift trees. a list with two arguments: the amount of jitter and random or equally spaced (see details below)} +\item{tip.dates}{A named vector of sampling times associated with the tips.} + \item{\dots}{further arguments to be passed to plot.} } \description{ @@ -108,7 +110,7 @@ trees \emph{Bioinformatics}, \bold{26 (10)}, 1372-1373. } \seealso{ \code{\link{plot.phylo}}, \code{\link{plot.networx}}, -\code{\link{jitter}} +\code{\link{jitter}}, \code{\link{rtt}} } \author{ Klaus Schliep \email{klaus.schliep@gmail.com} From 6a88c1ae2886bf9fcfe2cb21fb5645dcf0beb76f Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Fri, 5 May 2023 08:03:58 +0200 Subject: [PATCH 004/216] clean up --- R/phylo.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/phylo.R b/R/phylo.R index cbccaff2..f0ed3511 100644 --- a/R/phylo.R +++ b/R/phylo.R @@ -657,7 +657,7 @@ guess_model <- function(x){ } - +# needs to go in phangorn extra optEdgeMulti <- function(object, control = pml.control(epsilon = 1e-8, maxit = 10, trace = 1, tau = 1e-8), ...) { tree <- object$tree @@ -989,7 +989,7 @@ pml.fit4 <- function(tree, data, bf = rep(1 / length(levels), length(levels)), if (ASC) { ind <- seq_len(nc) p0 <- sum(exp(siteLik[ind])) - if(is.nan(log(1 - p0))) browser() + if(p0 >= 1) stop("Error Mkv") loglik <- loglik - sum(weight) * log(1 - p0) } if (!site) return(loglik) From adafd66e05d94c96e66b8d3c566e1de03a2b2ebb Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Fri, 5 May 2023 19:19:30 +0200 Subject: [PATCH 005/216] root tree if necessary --- R/pml_bb.R | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/R/pml_bb.R b/R/pml_bb.R index 4c1992c8..ef3bfe83 100644 --- a/R/pml_bb.R +++ b/R/pml_bb.R @@ -89,6 +89,12 @@ pml_bb <- function(x, model=NULL, rearrangement="stochastic", if(method=="tipdated" && !is.null(attr(start, "rate"))) fit <- update(fit, rate=attr(start, "rate")) } + if(optRooted && !is.rooted(fit$tree)){ + start <- candidate_tree(fit$data, method=method, + tip.dates = tip.dates, eps=1e-7) + fit <- update(fit, tree=start) + if(method=="tipdated") fit <- update(fit, rate=attr(start, "rate")) + } type <- attr(fit$data, "type") para <- split_model(model, type) if(type=="AA" && para$optFreq){ From 81c2994d5b2096e12e820816ef48ec8b575add65 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Tue, 9 May 2023 17:34:38 +0200 Subject: [PATCH 006/216] add reference --- vignettes/phangorn.bib | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/vignettes/phangorn.bib b/vignettes/phangorn.bib index ff511390..4a0fff4e 100644 --- a/vignettes/phangorn.bib +++ b/vignettes/phangorn.bib @@ -838,6 +838,19 @@ @Article{ Mathews2010 } +@article {Pfingstl2023, + author = "Tobias Pfingstl and Iris Bardel-Kahr and Klaus Schliep", + title = "One step closer but still far from solving the puzzle – The phylogeny of marine associated mites (Acari, Oribatida, Ameronothroidea) inferred from morphological and molecular genetic data", + journal = "Contributions to Zoology", + year = "2023", + publisher = "Brill", + address = "Leiden, The Netherlands", + doi = "https://doi.org/10.1163/18759866-bja10043", + pages= "1 - 33" +} + + + @article {Schliep2017, author = "Schliep, Klaus and Potts, Alastair J. and Morrison, David A. and Grimm, Guido W.", title = "Intertwining phylogenetic trees and networks", From 57fb66130d4ef03e93637284dc01a59feff532f8 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Tue, 9 May 2023 17:55:11 +0200 Subject: [PATCH 007/216] use ape functions --- R/draw_CI.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/draw_CI.R b/R/draw_CI.R index 2ba3163d..096a9811 100644 --- a/R/draw_CI.R +++ b/R/draw_CI.R @@ -6,7 +6,7 @@ edge_length_matrix <- function(tree, trees, rooted=TRUE){ tree <- unroot(tree) } else{ - if(!is_rooted(tree) || any(!is_rooted(trees))) stop("All trees need to be rooted!") + if(!is.rooted(tree) || any(!is.rooted(trees))) stop("All trees need to be rooted!") } fun <- function(x){ el <- numeric(max(x$edge)) @@ -110,7 +110,7 @@ add_ci <- function(tree, trees, col95 = "#FF00004D", col50 = "#0000FF4D", lastPP <- get("last_plot.phylo", envir = ape::.PlotPhyloEnv) direction <- lastPP$direction trees <- .uncompressTipLabel(trees) - if(!is_rooted(tree) || !all(is_rooted(trees))) stop("All trees need to be rooted!") + if(!is.rooted(tree) || !all(is.rooted(trees))) stop("All trees need to be rooted!") X <- edge_length_matrix(tree, trees, rooted=TRUE)[, -(seq_along(Ntip(tree)))] CI <- apply(X, 2, FUN=\(x)quantile(na.omit(x), probs=c(.025,.25,.75,.975))) horizontal <- FALSE From 8895b927d0df8504e28fe9415a5c5db700f4df6f Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Tue, 9 May 2023 18:01:33 +0200 Subject: [PATCH 008/216] add MCC option to plotBS. Small improvements --- R/bootstrap.R | 9 +++++---- R/maxCladeCred.R | 2 +- man/maxCladeCred.Rd | 2 +- man/plotBS.Rd | 5 +++-- 4 files changed, 10 insertions(+), 8 deletions(-) diff --git a/R/bootstrap.R b/R/bootstrap.R index d0dace12..7f53fe22 100644 --- a/R/bootstrap.R +++ b/R/bootstrap.R @@ -83,7 +83,7 @@ bootstrap.pml <- function(x, bs = 100, trees = TRUE, multicore = FALSE, if(.Platform$OS.type=="windows") multicore <- FALSE if (multicore && is.null(mc.cores)) mc.cores <- min(detectCores()-1L, 4L) if(multicore && mc.cores < 2L) multicore <- FALSE - if(is_rooted(x$tree)){ + if(is.rooted(x$tree)){ if(is.ultrametric(x$tree)) method <- "ultrametric" else method <- "tipdated" } @@ -231,8 +231,9 @@ checkLabels <- function(tree, tip) { #' @param type the type of tree to plot, one of "phylogram", "cladogram", "fan", #' "unrooted", "radial" or "none". If type is "none" the tree is returned with #' the bootstrap values assigned to the node labels. -#' @param method either "FBP" the classical bootstrap (default) or "TBE" -#' (transfer bootstrap) +#' @param method either "FBP" the classical bootstrap (default), "TBE" +#' (transfer bootstrap) or "MCC" for assigning clade credibilities. In case of +#' "MCC" all trees need to be rooted. #' @param bs.col color of bootstrap support labels. #' @param bs.adj one or two numeric values specifying the horizontal and #' vertical justification of the bootstrap labels. @@ -282,7 +283,7 @@ plotBS <- function(tree, BStrees, type = "phylogram", if (hasArg(BStrees)) { if(method=="FBP"){ BStrees <- .uncompressTipLabel(BStrees) # check if needed - if (any(is_rooted(BStrees))) BStrees <- unroot(BStrees) + if (any(is.rooted(BStrees))) BStrees <- unroot(BStrees) x <- prop.clades(tree, BStrees) x <- (x / length(BStrees)) * 100 tree$node.label <- x diff --git a/R/maxCladeCred.R b/R/maxCladeCred.R index d792b3b9..0b08c7fa 100644 --- a/R/maxCladeCred.R +++ b/R/maxCladeCred.R @@ -23,7 +23,7 @@ #' @author Klaus Schliep \email{klaus.schliep@@gmail.com} #' @seealso \code{\link{consensus}}, \code{\link{consensusNet}}, #' \code{\link{prop.part}}, \code{\link{bootstrap.pml}}, \code{\link{plotBS}}, -#' \code{\link{transferBootstrap}} +#' \code{\link{transferBootstrap}}, \code{\link{add_edge_length}} #' @keywords cluster #' @importFrom fastmatch fmatch #' @examples diff --git a/man/maxCladeCred.Rd b/man/maxCladeCred.Rd index 58c08e53..62582c94 100644 --- a/man/maxCladeCred.Rd +++ b/man/maxCladeCred.Rd @@ -72,7 +72,7 @@ maxCladeCred(c(tree, bs[[1]]), tree=FALSE, part = pp) \seealso{ \code{\link{consensus}}, \code{\link{consensusNet}}, \code{\link{prop.part}}, \code{\link{bootstrap.pml}}, \code{\link{plotBS}}, -\code{\link{transferBootstrap}} +\code{\link{transferBootstrap}}, \code{\link{add_edge_length}} } \author{ Klaus Schliep \email{klaus.schliep@gmail.com} diff --git a/man/plotBS.Rd b/man/plotBS.Rd index 13226e0b..e862831c 100644 --- a/man/plotBS.Rd +++ b/man/plotBS.Rd @@ -17,8 +17,9 @@ plotBS(tree, BStrees, type = "phylogram", method = "FBP", "unrooted", "radial" or "none". If type is "none" the tree is returned with the bootstrap values assigned to the node labels.} -\item{method}{either "FBP" the classical bootstrap (default) or "TBE" -(transfer bootstrap)} +\item{method}{either "FBP" the classical bootstrap (default), "TBE" +(transfer bootstrap) or "MCC" for assigning clade credibilities. In case of +"MCC" all trees need to be rooted.} \item{bs.col}{color of bootstrap support labels.} From ecf476ffa883072c969ec6e0dd535501bc6e56f3 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Tue, 9 May 2023 18:03:32 +0200 Subject: [PATCH 009/216] Small improvements --- vignettes/Trees.Rmd | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/vignettes/Trees.Rmd b/vignettes/Trees.Rmd index 7e69eb81..93df697a 100644 --- a/vignettes/Trees.Rmd +++ b/vignettes/Trees.Rmd @@ -226,8 +226,7 @@ write.tree(tree_tfbs, "primates.tree") ## Molecular dating with a strict clock for ultrametric and tipdated phylogenies -When we assume a "molecular clock" phylogenies can be used to infer divergence times [@Zuckerkandl1965]. We implemented a strict clock as described in [@Felsenstein2004], p. 266, allowing to infer ultrametric and tip-dated phylogenies. We need a starting tree that fulfills the assumptions, so either the tree has to be ultrametric, or the constraints given by the tip dates. -For an ultrametric starting tree we can use an UPGMA or WPGMA tree. +When we assume a "molecular clock" phylogenies can be used to infer divergence times [@Zuckerkandl1965]. We implemented a strict clock as described in [@Felsenstein2004], p. 266, allowing to infer ultrametric and tip-dated phylogenies. The function `pml_bb` ensures that the tree is ultrametric, or the constraints given by the tip dates are fulfilled. That differs from the function `optim.pml` where th tree supplied to the function has to fulfill the constraints. In this case for an ultrametric starting tree we can use an UPGMA or WPGMA tree. ```{r strict_primates, echo=TRUE, cache=TRUE} fit_strict <- pml_bb(primates, model="HKY+G(4)", method="ultrametric", rearrangement="NNI", control = pml.control(trace = 0)) @@ -249,16 +248,20 @@ head(dates) ``` Again we use the `pml_bb` function, which optimizes the tree given the constraints of the `tip.dates` vector. ```{r tipdated_fit} -fit_td <- pml_bb(H3N2, model="GTR+G(4)", method="tipdated", tip.dates=dates, +fit_td <- pml_bb(H3N2, model="HKY+I", method="tipdated", tip.dates=dates, rearrangement="NNI", control = pml.control(trace = 0)) +fit_td ``` +While the loglikelihood is lower than for an unrooted tree, we have to keep in mind that rooted trees use less parameters. +In unrooted trees we estimate one edge length parameter for each tree, for ultrametric trees we only estimate a parameter for each internal node and for tipdated trees we have one additional parameter for the rate. The rate is here comparable to the slope fo the tip-to-root regression in programs like *TempEst* [@TempEst]. + + + And at last we plot the tree with a timescale. ```{r tipdated_plot} -plot(fit_td, show.tip.label = FALSE) +plot(fit_td, align.tip.label=TRUE) ``` -While the loglikelihood is lower than for an unrooted tree, we have to keep in mind that rooted trees use less parameters. -In unrooted trees we estimate one edge length parameter for each tree, for ultrametric trees we only estimate a parameter for each internal node and for tipdated trees we have one additional parameter for the rate. \newpage From cdbf6db9a36d2e472b6fdeaae1f602dc34a36500 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Tue, 9 May 2023 18:04:12 +0200 Subject: [PATCH 010/216] print rate for tipdated tree --- R/pml_generics.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/pml_generics.R b/R/pml_generics.R index a21d472c..1b38e94c 100644 --- a/R/pml_generics.R +++ b/R/pml_generics.R @@ -91,6 +91,7 @@ print.pml <- function(x, ...) { row.names(rw) <- as.character(seq_len(nrow(rw))) print(rw) } + if(!is.null(x$method) && x$method == "tipdated") cat("\nRate:", x$rate, "\n") if (type == "AA") cat("Rate matrix:", x$model, "\n") if (type == "DNA") { cat("\nRate matrix:\n") From 28928d76e4af41e5df9fa8b22948a15e9a975272 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Tue, 9 May 2023 18:04:55 +0200 Subject: [PATCH 011/216] uncomment uneeded functions --- R/bootstrap.R | 32 +++++++++++++++----------------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/R/bootstrap.R b/R/bootstrap.R index 7f53fe22..aac249ab 100644 --- a/R/bootstrap.R +++ b/R/bootstrap.R @@ -336,26 +336,24 @@ plotBS <- function(tree, BStrees, type = "phylogram", -is_rooted <- function(phy) UseMethod("is_rooted") +#is_rooted <- function(phy) UseMethod("is_rooted") -.is_rooted_ape <- function(phy, ntips) -{ - if (!is.null(phy$root.edge)) return(TRUE) - if (tabulate(phy$edge[, 1])[ntips + 1] > 2) FALSE else TRUE -} - -is_rooted.phylo <- function (phy) - .is_rooted_ape(phy, length(phy$tip.label)) - -is_rooted.multiPhylo <- function(phy) -{ - labs <- attr(phy, "TipLabel") - class(phy) <- NULL - if (is.null(labs)) sapply(phy, is_rooted.phylo) - else sapply(phy, .is_rooted_ape, ntips = length(labs)) -} +#.is_rooted_ape <- function(phy, ntips) +#{ +# if (!is.null(phy$root.edge)) return(TRUE) +# if (tabulate(phy$edge[, 1])[ntips + 1] > 2) FALSE else TRUE +#} +#is_rooted.phylo <- function (phy) +# .is_rooted_ape(phy, length(phy$tip.label)) +#is_rooted.multiPhylo <- function(phy) +#{ +# labs <- attr(phy, "TipLabel") +# class(phy) <- NULL +# if (is.null(labs)) sapply(phy, is_rooted.phylo) +# else sapply(phy, .is_rooted_ape, ntips = length(labs)) +#} cladeMatrix <- function(x, rooted = FALSE) { From 137361f93531e9656fcdd4756b406090a4e78e4c Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Tue, 9 May 2023 18:05:32 +0200 Subject: [PATCH 012/216] updates --- NEWS | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/NEWS b/NEWS index 19bb624f..8f1094f1 100644 --- a/NEWS +++ b/NEWS @@ -12,11 +12,15 @@ OTHER CHANGES which were implicitly using "estimated". - o some improvements to pmlPart + o pml_bb roots the tree, if method is either ultrametric or tipdated. Fixes - o nicer defaults for plot.pml + problem when modelTest object was supplied. - o the sankoff algorithm has been rewritten + o some improvements to pmlPart. + + o nicer defaults for plot.pml, mainly for rooted trees. + + o the sankoff algorithm has been rewritten. CHANGES in PHANGORN VERSION 2.11.0 From 990d3ec17881c52abaef431cf8f40c0da6a384c2 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Tue, 9 May 2023 19:32:02 +0200 Subject: [PATCH 013/216] better defaults, check if trees are rooted --- R/bootstrap.R | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/R/bootstrap.R b/R/bootstrap.R index aac249ab..52de4623 100644 --- a/R/bootstrap.R +++ b/R/bootstrap.R @@ -86,9 +86,12 @@ bootstrap.pml <- function(x, bs = 100, trees = TRUE, multicore = FALSE, if(is.rooted(x$tree)){ if(is.ultrametric(x$tree)) method <- "ultrametric" else method <- "tipdated" + optRooted <- TRUE + } + else { + method <- "unrooted" + optRooted <- FALSE } - else method <- "unrooted" - extras <- match.call(expand.dots = FALSE)$... rearr <- c("optNni", "rearrangement") tmp <- pmatch(names(extras), rearr) @@ -106,7 +109,9 @@ bootstrap.pml <- function(x, bs = 100, trees = TRUE, multicore = FALSE, tmp <- pmatch("optRooted", names(extras)) if(!is.na(tmp)){ is_ultrametric <- extras$optRooted + optRooted <- extras$optRooted } + else if(optRooted) extras <- append(extras, list(optRooted=TRUE)) data <- x$data weight <- attr(data, "weight") v <- rep(seq_along(weight), weight) @@ -128,7 +133,8 @@ bootstrap.pml <- function(x, bs = 100, trees = TRUE, multicore = FALSE, tip.dates = tip.dates) fit <- update(fit, tree = tree) } - fit <- optim.pml(fit, ...) +# fit <- optim.pml(fit, ...) + fit <- do.call(optim.pml, append(list(object=fit), extras)) if (trees) { tree <- fit$tree return(tree) From b60878b39c528b144e88751656e35e81ad4bde5e Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Tue, 9 May 2023 19:42:59 +0200 Subject: [PATCH 014/216] clean up --- R/bootstrap.R | 21 --------------------- 1 file changed, 21 deletions(-) diff --git a/R/bootstrap.R b/R/bootstrap.R index 52de4623..b5b9b71e 100644 --- a/R/bootstrap.R +++ b/R/bootstrap.R @@ -341,27 +341,6 @@ plotBS <- function(tree, BStrees, type = "phylogram", } - -#is_rooted <- function(phy) UseMethod("is_rooted") - -#.is_rooted_ape <- function(phy, ntips) -#{ -# if (!is.null(phy$root.edge)) return(TRUE) -# if (tabulate(phy$edge[, 1])[ntips + 1] > 2) FALSE else TRUE -#} - -#is_rooted.phylo <- function (phy) -# .is_rooted_ape(phy, length(phy$tip.label)) - -#is_rooted.multiPhylo <- function(phy) -#{ -# labs <- attr(phy, "TipLabel") -# class(phy) <- NULL -# if (is.null(labs)) sapply(phy, is_rooted.phylo) -# else sapply(phy, .is_rooted_ape, ntips = length(labs)) -#} - - cladeMatrix <- function(x, rooted = FALSE) { if (!rooted) x <- unroot(x) pp <- prop.part(x) From 5edb485665095f11b51d748a31a10abc8f84912f Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Tue, 9 May 2023 22:04:44 +0200 Subject: [PATCH 015/216] bugfix --- R/draw_CI.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/draw_CI.R b/R/draw_CI.R index 096a9811..8ca4f049 100644 --- a/R/draw_CI.R +++ b/R/draw_CI.R @@ -111,7 +111,7 @@ add_ci <- function(tree, trees, col95 = "#FF00004D", col50 = "#0000FF4D", direction <- lastPP$direction trees <- .uncompressTipLabel(trees) if(!is.rooted(tree) || !all(is.rooted(trees))) stop("All trees need to be rooted!") - X <- edge_length_matrix(tree, trees, rooted=TRUE)[, -(seq_along(Ntip(tree)))] + X <- edge_length_matrix(tree, trees, rooted=TRUE)[, -(seq_len(Ntip(tree)))] CI <- apply(X, 2, FUN=\(x)quantile(na.omit(x), probs=c(.025,.25,.75,.975))) horizontal <- FALSE if(direction=="rightwards" || direction=="leftwards"){ From b6c1f597f8b4702bde761e94a346b3b5339ec497 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Tue, 30 May 2023 15:49:20 +0200 Subject: [PATCH 016/216] allow to return vector --- R/transferBootstrap.R | 11 +++++++---- man/transferBootstrap.Rd | 7 ++++--- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/R/transferBootstrap.R b/R/transferBootstrap.R index 2affd7f4..4259a116 100644 --- a/R/transferBootstrap.R +++ b/R/transferBootstrap.R @@ -1,7 +1,7 @@ ## include in addConfidences, plotBS etc. #' Transfer Bootstrap #' -#' \code{transferBootstrap} assignes transfer bootstrap (Lemoine et al. 2018) +#' \code{transferBootstrap} assigns transfer bootstrap (Lemoine et al. 2018) #' values to the (internal) edges. #' #' @param tree The tree on which edges the bootstrap values are plotted. @@ -11,7 +11,8 @@ #' \code{BSTrees} is optional and if not supplied the labels supplied #' in the \code{node.label} slot will be used. #' @author Klaus Schliep \email{klaus.schliep@@gmail.com} -#' @seealso \code{\link{plotBS}}, \code{\link{maxCladeCred}} +#' @seealso \code{\link{plotBS}}, \code{\link{maxCladeCred}}, +#' \code{\link{drawSupportOnEdges}} #' @references Lemoine, F., Entfellner, J. B. D., Wilkinson, E., Correia, D., #' Felipe, M. D., De Oliveira, T., & Gascuel, O. (2018). Renewing Felsenstein’s #' phylogenetic bootstrap in the era of big data. \emph{Nature}, @@ -29,7 +30,7 @@ #' # same as #' plotBS(raxml.tree, raxml.bootstrap, "p", "TBE") #' @export -transferBootstrap <- function(tree, BStrees){ +transferBootstrap <- function(tree, BStrees, phylo=TRUE){ if(!inherits(BStrees, "multiPhylo")) stop("BSTrees needs to be of class multiPhylo!") BStrees <- .uncompressTipLabel(BStrees) @@ -51,7 +52,9 @@ transferBootstrap <- function(tree, BStrees){ for(j in ind) res[j] <- res[j] + Transfer_Index(bp[[j]], tmp$edge, l) } res <- res / length(BStrees) * 100 - tree$node.label <- c(NA_real_, res) + res <- c(NA_real_, res) + if(!phylo) return(res) + tree$node.label <- res tree } diff --git a/man/transferBootstrap.Rd b/man/transferBootstrap.Rd index 4a8ec1eb..bee1f277 100644 --- a/man/transferBootstrap.Rd +++ b/man/transferBootstrap.Rd @@ -4,7 +4,7 @@ \alias{transferBootstrap} \title{Transfer Bootstrap} \usage{ -transferBootstrap(tree, BStrees) +transferBootstrap(tree, BStrees, phylo = TRUE) } \arguments{ \item{tree}{The tree on which edges the bootstrap values are plotted.} @@ -18,7 +18,7 @@ transferBootstrap(tree, BStrees) in the \code{node.label} slot will be used. } \description{ -\code{transferBootstrap} assignes transfer bootstrap (Lemoine et al. 2018) +\code{transferBootstrap} assigns transfer bootstrap (Lemoine et al. 2018) values to the (internal) edges. } \examples{ @@ -41,7 +41,8 @@ phylogenetic bootstrap in the era of big data. \emph{Nature}, \bold{556(7702)}, 452--456. } \seealso{ -\code{\link{plotBS}}, \code{\link{maxCladeCred}} +\code{\link{plotBS}}, \code{\link{maxCladeCred}}, +\code{\link{drawSupportOnEdges}} } \author{ Klaus Schliep \email{klaus.schliep@gmail.com} From f4cd79ccaaa6c8eeeea4c8b5a71e44fed05e3ae7 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Tue, 30 May 2023 16:46:50 +0200 Subject: [PATCH 017/216] fix man page --- R/transferBootstrap.R | 2 ++ man/transferBootstrap.Rd | 3 +++ 2 files changed, 5 insertions(+) diff --git a/R/transferBootstrap.R b/R/transferBootstrap.R index 4259a116..a2fc9181 100644 --- a/R/transferBootstrap.R +++ b/R/transferBootstrap.R @@ -6,6 +6,8 @@ #' #' @param tree The tree on which edges the bootstrap values are plotted. #' @param BStrees a list of trees (object of class "multiPhylo"). +#' @param phylo Logical, return a phylogentic tree with support value or a +#' vector of bootstrap values. #' @return \code{plotBS} returns silently a tree, i.e. an object of class #' \code{phylo} with the bootstrap values as node labels. The argument #' \code{BSTrees} is optional and if not supplied the labels supplied diff --git a/man/transferBootstrap.Rd b/man/transferBootstrap.Rd index bee1f277..c62a2af8 100644 --- a/man/transferBootstrap.Rd +++ b/man/transferBootstrap.Rd @@ -10,6 +10,9 @@ transferBootstrap(tree, BStrees, phylo = TRUE) \item{tree}{The tree on which edges the bootstrap values are plotted.} \item{BStrees}{a list of trees (object of class "multiPhylo").} + +\item{phylo}{Logical, return a phylogentic tree with support value or a +vector of bootstrap values.} } \value{ \code{plotBS} returns silently a tree, i.e. an object of class From 00a79705ead684b8d5c8c4f8161ff231c790d398 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Tue, 30 May 2023 16:48:22 +0200 Subject: [PATCH 018/216] allow clade credibilities --- R/bootstrap.R | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/R/bootstrap.R b/R/bootstrap.R index b5b9b71e..1e853c76 100644 --- a/R/bootstrap.R +++ b/R/bootstrap.R @@ -285,11 +285,14 @@ plotBS <- function(tree, BStrees, type = "phylogram", bs.adj = NULL, digits=3, p = 0, frame = "none", ...) { type <- match.arg(type, c("phylogram", "cladogram", "fan", "unrooted", "radial", "none")) - method <- match.arg(method, c("FBP", "TBE")) + if(inherits(tree, "pml")) tree <- tree$tree + method <- match.arg(method, c("FBP", "TBE", "MCC")) if (hasArg(BStrees)) { - if(method=="FBP"){ + if(method=="FBP" || method=="MCC"){ BStrees <- .uncompressTipLabel(BStrees) # check if needed - if (any(is.rooted(BStrees))) BStrees <- unroot(BStrees) + if(method=="MCC" && any(!is.rooted(BStrees))) + stop("All trees need to be rooted for method 'MCC'!") + if (method=="FBP" && any(is.rooted(BStrees))) BStrees <- unroot(BStrees) x <- prop.clades(tree, BStrees) x <- (x / length(BStrees)) * 100 tree$node.label <- x From b06c0cb420d0bf4a2d93d94bdeef1f5044b4cd89 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Fri, 2 Jun 2023 12:03:11 +0200 Subject: [PATCH 019/216] fix spelling --- R/plot_pml.R | 2 +- man/plot.pml.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/plot_pml.R b/R/plot_pml.R index a9ae6a45..86fe607d 100644 --- a/R/plot_pml.R +++ b/R/plot_pml.R @@ -1,6 +1,6 @@ #' Plot phylogeny of a pml object #' -#' \code{plot.pml} is a warapper around \code{plot.phylo} with different default +#' \code{plot.pml} is a wrapper around \code{plot.phylo} with different default #' values for unrooted, ultrametric and tip dated phylogenies. #' #' @param x an object of class \code{pml} or \code{phyDat}. diff --git a/man/plot.pml.Rd b/man/plot.pml.Rd index f066c45f..77971584 100644 --- a/man/plot.pml.Rd +++ b/man/plot.pml.Rd @@ -20,7 +20,7 @@ and "downwards".} \item{\dots}{further parameters to be passed to \code{plot.phylo}.} } \description{ -\code{plot.pml} is a warapper around \code{plot.phylo} with different default +\code{plot.pml} is a wrapper around \code{plot.phylo} with different default values for unrooted, ultrametric and tip dated phylogenies. } \examples{ From 9e2e1db0a139056a66b35e896f957d3bc5d9b92e Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Wed, 7 Jun 2023 11:46:46 +0200 Subject: [PATCH 020/216] small --- R/draw_CI.R | 2 +- man/add_edge_length.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/draw_CI.R b/R/draw_CI.R index 8ca4f049..934392f3 100644 --- a/R/draw_CI.R +++ b/R/draw_CI.R @@ -59,7 +59,7 @@ edge_length_matrix <- function(tree, trees, rooted=TRUE){ ##' @keywords aplot ##' @export add_edge_length <- function(tree, trees, fun=\(x)median(na.omit(x)), - rooted=TRUE){ + rooted=all(is.rooted(c(tree, trees)))){ if(!rooted) tree <- unroot(tree) X <- edge_length_matrix(tree, trees, rooted) nh <- apply(X, 2, fun) diff --git a/man/add_edge_length.Rd b/man/add_edge_length.Rd index c010e8cd..e79911e2 100644 --- a/man/add_edge_length.Rd +++ b/man/add_edge_length.Rd @@ -5,7 +5,7 @@ \title{Assign and compute edge lengths from a sample of trees} \usage{ add_edge_length(tree, trees, fun = function(x) median(na.omit(x)), - rooted = TRUE) + rooted = all(is.rooted(c(tree, trees)))) } \arguments{ \item{tree}{tree where edge lengths are assigned to.} From 84df1d26e650f17f833fcd9706f0fb74c5038b81 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Fri, 16 Jun 2023 09:37:45 +0200 Subject: [PATCH 021/216] fix spelling --- vignettes/IntertwiningTreesAndNetworks.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vignettes/IntertwiningTreesAndNetworks.Rmd b/vignettes/IntertwiningTreesAndNetworks.Rmd index 797c55d8..3551722e 100644 --- a/vignettes/IntertwiningTreesAndNetworks.Rmd +++ b/vignettes/IntertwiningTreesAndNetworks.Rmd @@ -30,7 +30,7 @@ knitr::knit_hooks$set(small.mar=function(before, options, envir){ -*Description:* This script provides examples of the new functions available in the phangorn library to 'intertwine' trees and networks, i.e. compare trees and networks and data transferrance. It also provides a step-by-step guide for users new to R. +*Description:* This script provides examples of the new functions available in the phangorn library to 'intertwine' trees and networks, i.e. compare trees and networks and data transference. It also provides a step-by-step guide for users new to R. *Methodological advancement:* The major advancement in this phangorn update is the introduction of a generic network object with a wide range of related transfer and analysis functions. These new functions provide the first means to directly transfer information amongst a wide range of phylogenetic trees and networks, as well as means to visualize and further analyze this information. This should provide a platform for individuals to easily conduct tree-network comparisons and stimulate further function development by the community. From 83c3be8656eab378abc9cede8b209e341c638534 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Fri, 16 Jun 2023 09:38:17 +0200 Subject: [PATCH 022/216] simplify --- src/ml.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/ml.c b/src/ml.c index d765530e..e7678e04 100644 --- a/src/ml.c +++ b/src/ml.c @@ -248,7 +248,7 @@ void lll3(SEXP dlist, double *eva, double *eve, double *evei, double *el, double SEXP PML0(SEXP dlist, SEXP EL, SEXP G, SEXP NR, SEXP NC, SEXP K, SEXP eig, SEXP bf, SEXP node, SEXP edge, SEXP NTips, SEXP nco, SEXP contrast, SEXP N){ int nr=INTEGER(NR)[0], nc=INTEGER(NC)[0], k=INTEGER(K)[0], i, indLL; int nTips = INTEGER(NTips)[0], *SC; - double *g=REAL(G), *tmp, logScaleEPS; + double *g=REAL(G), *tmp; //, logScaleEPS; SEXP TMP; double *eva, *eve, *evei; eva = REAL(VECTOR_ELT(eig, 0)); @@ -263,8 +263,8 @@ SEXP PML0(SEXP dlist, SEXP EL, SEXP G, SEXP NR, SEXP NC, SEXP K, SEXP eig, SEXP for(i=0; i Date: Fri, 16 Jun 2023 09:42:01 +0200 Subject: [PATCH 023/216] allow character vectors --- R/treeManipulation.R | 19 +++++++++++++++++-- man/Ancestors.Rd | 3 ++- 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/R/treeManipulation.R b/R/treeManipulation.R index 8a288b5d..c54b35ac 100644 --- a/R/treeManipulation.R +++ b/R/treeManipulation.R @@ -632,7 +632,16 @@ allAncestors <- function(x) { res } - +char2pos <- function(x, node){ + if(is.null(x$node.label)){ + tmp <- as.character(seq(Ntip(x)+1, Ntip(x)+Nnode(x))) + labels <- c(x$tip.label, tmp) + } + else labels <- c(x$tip.label, x$node.label) + x <- match(node, labels) + if(any(is.na(x))) stop("Can't find supplied node in the labels") + x +} ## @aliases Ancestors Children Descendants Siblings mrca.phylo #' tree utility function @@ -649,7 +658,8 @@ allAncestors <- function(x) { #' If the argument node is missing the function is evaluated for all nodes. #' #' @param x a tree (a phylo object). -#' @param node an integer or a vector of integers corresponding to a node ID +#' @param node an integer or character vector (or skalar) corresponding to a +#' node ID #' @param type specify whether to return just direct children / parents or all #' @param include.self whether to include self in list of siblings #' @param full a logical indicating whether to return the MRCAs among all tips @@ -678,6 +688,7 @@ allAncestors <- function(x) { #' @export #' @rdname Ancestors Ancestors <- function(x, node, type = c("all", "parent")) { + if(inherits(node, "character")) x <- char2pos(x, node) parents <- x$edge[, 1] child <- x$edge[, 2] pvector <- integer(max(x$edge)) # parents @@ -730,6 +741,7 @@ allDescendants <- function(x) { #' @export Children <- function(x, node) { # return allChildren if node is missing + if(inherits(node, "character")) x <- char2pos(x, node) if (!missing(node) && length(node) == 1) return(x$edge[x$edge[, 1] == node, 2]) allChildren(x)[node] @@ -740,6 +752,7 @@ Children <- function(x, node) { #' @export Descendants <- function(x, node, type = c("tips", "children", "all")) { type <- match.arg(type) + if(inherits(node, "character")) x <- char2pos(x, node) if (type == "children") return(Children(x, node)) if (type == "tips") return(bip(x)[node]) # new version using Rcpp @@ -766,6 +779,7 @@ Descendants <- function(x, node, type = c("tips", "children", "all")) { #' @export Siblings <- function(x, node, include.self = FALSE) { if (missing(node)) node <- as.integer(1:max(x$edge)) + if(inherits(node, "character")) x <- char2pos(x, node) l <- length(node) if (l == 1) { v <- Children(x, Ancestors(x, node, "parent")) @@ -799,6 +813,7 @@ Siblings <- function(x, node, include.self = FALSE) { #' @export mrca.phylo <- function(x, node = NULL, full = FALSE) { if (is.null(node)) return(mrca2(x, full = full)) + if(inherits(node, "character")) x <- char2pos(x, node) return(getMRCA(x, node)) } diff --git a/man/Ancestors.Rd b/man/Ancestors.Rd index 1e61d48e..eb5d8ba7 100644 --- a/man/Ancestors.Rd +++ b/man/Ancestors.Rd @@ -24,7 +24,8 @@ mrca.phylo(x, node = NULL, full = FALSE) \arguments{ \item{x}{a tree (a phylo object).} -\item{node}{an integer or a vector of integers corresponding to a node ID} +\item{node}{an integer or character vector (or skalar) corresponding to a +node ID} \item{type}{specify whether to return just direct children / parents or all} From 48a0245128e4dbcdd58ac2917895a808314a8196 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Fri, 16 Jun 2023 09:42:31 +0200 Subject: [PATCH 024/216] Improve manual --- R/pml_control.R | 4 ++-- man/pml.control.Rd | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/R/pml_control.R b/R/pml_control.R index c1fada40..2174236f 100644 --- a/R/pml_control.R +++ b/R/pml_control.R @@ -1,7 +1,7 @@ #' Auxiliary for Controlling Fitting #' -#' Auxiliary functions for \code{\link{optim.pml}} fitting. Use it to construct -#' a \code{control} or \code{ratchet.par} argument. +#' Auxiliary functions for providing \code{\link{optim.pml}, \link{pml_bb}} +#' fitting. Use it to construct a \code{control} or \code{ratchet.par} argument. #' #' \code{pml.control} controls the fitting process. \code{epsilon} and #' \code{maxit} are only defined for the most outer loop, this affects diff --git a/man/pml.control.Rd b/man/pml.control.Rd index 5116ac22..5b3d77ca 100644 --- a/man/pml.control.Rd +++ b/man/pml.control.Rd @@ -39,8 +39,8 @@ A list with components named as the arguments for controlling the fitting process. } \description{ -Auxiliary functions for \code{\link{optim.pml}} fitting. Use it to construct -a \code{control} or \code{ratchet.par} argument. +Auxiliary functions for providing \code{\link{optim.pml}, \link{pml_bb}} +fitting. Use it to construct a \code{control} or \code{ratchet.par} argument. } \details{ \code{pml.control} controls the fitting process. \code{epsilon} and From c151435076e8f39bb6977d911c383ad8fa705b2f Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Fri, 16 Jun 2023 10:24:47 +0200 Subject: [PATCH 025/216] bug fix --- R/treeManipulation.R | 12 ++++++------ man/Ancestors.Rd | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/R/treeManipulation.R b/R/treeManipulation.R index c54b35ac..36a1061d 100644 --- a/R/treeManipulation.R +++ b/R/treeManipulation.R @@ -658,7 +658,7 @@ char2pos <- function(x, node){ #' If the argument node is missing the function is evaluated for all nodes. #' #' @param x a tree (a phylo object). -#' @param node an integer or character vector (or skalar) corresponding to a +#' @param node an integer or character vector (or scalar) corresponding to a #' node ID #' @param type specify whether to return just direct children / parents or all #' @param include.self whether to include self in list of siblings @@ -688,7 +688,7 @@ char2pos <- function(x, node){ #' @export #' @rdname Ancestors Ancestors <- function(x, node, type = c("all", "parent")) { - if(inherits(node, "character")) x <- char2pos(x, node) + if(inherits(node, "character")) node <- char2pos(x, node) parents <- x$edge[, 1] child <- x$edge[, 2] pvector <- integer(max(x$edge)) # parents @@ -741,7 +741,7 @@ allDescendants <- function(x) { #' @export Children <- function(x, node) { # return allChildren if node is missing - if(inherits(node, "character")) x <- char2pos(x, node) + if(inherits(node, "character")) node <- char2pos(x, node) if (!missing(node) && length(node) == 1) return(x$edge[x$edge[, 1] == node, 2]) allChildren(x)[node] @@ -752,7 +752,7 @@ Children <- function(x, node) { #' @export Descendants <- function(x, node, type = c("tips", "children", "all")) { type <- match.arg(type) - if(inherits(node, "character")) x <- char2pos(x, node) + if(inherits(node, "character")) node <- char2pos(x, node) if (type == "children") return(Children(x, node)) if (type == "tips") return(bip(x)[node]) # new version using Rcpp @@ -779,7 +779,7 @@ Descendants <- function(x, node, type = c("tips", "children", "all")) { #' @export Siblings <- function(x, node, include.self = FALSE) { if (missing(node)) node <- as.integer(1:max(x$edge)) - if(inherits(node, "character")) x <- char2pos(x, node) + if(inherits(node, "character")) node <- char2pos(x, node) l <- length(node) if (l == 1) { v <- Children(x, Ancestors(x, node, "parent")) @@ -813,7 +813,7 @@ Siblings <- function(x, node, include.self = FALSE) { #' @export mrca.phylo <- function(x, node = NULL, full = FALSE) { if (is.null(node)) return(mrca2(x, full = full)) - if(inherits(node, "character")) x <- char2pos(x, node) + if(inherits(node, "character")) node <- char2pos(x, node) return(getMRCA(x, node)) } diff --git a/man/Ancestors.Rd b/man/Ancestors.Rd index eb5d8ba7..8f99ea0c 100644 --- a/man/Ancestors.Rd +++ b/man/Ancestors.Rd @@ -24,7 +24,7 @@ mrca.phylo(x, node = NULL, full = FALSE) \arguments{ \item{x}{a tree (a phylo object).} -\item{node}{an integer or character vector (or skalar) corresponding to a +\item{node}{an integer or character vector (or scalar) corresponding to a node ID} \item{type}{specify whether to return just direct children / parents or all} From 5b0d695627e420d2179be80b93ed250df3d45063 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Fri, 16 Jun 2023 14:49:18 +0200 Subject: [PATCH 026/216] bugfix --- R/treeManipulation.R | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/R/treeManipulation.R b/R/treeManipulation.R index 36a1061d..b0207a82 100644 --- a/R/treeManipulation.R +++ b/R/treeManipulation.R @@ -688,7 +688,7 @@ char2pos <- function(x, node){ #' @export #' @rdname Ancestors Ancestors <- function(x, node, type = c("all", "parent")) { - if(inherits(node, "character")) node <- char2pos(x, node) + if(!missing(node) && inherits(node, "character")) node <- char2pos(x, node) parents <- x$edge[, 1] child <- x$edge[, 2] pvector <- integer(max(x$edge)) # parents @@ -741,7 +741,7 @@ allDescendants <- function(x) { #' @export Children <- function(x, node) { # return allChildren if node is missing - if(inherits(node, "character")) node <- char2pos(x, node) + if(!missing(node) && inherits(node, "character")) node <- char2pos(x, node) if (!missing(node) && length(node) == 1) return(x$edge[x$edge[, 1] == node, 2]) allChildren(x)[node] @@ -752,7 +752,7 @@ Children <- function(x, node) { #' @export Descendants <- function(x, node, type = c("tips", "children", "all")) { type <- match.arg(type) - if(inherits(node, "character")) node <- char2pos(x, node) + if(!missing(node) && inherits(node, "character")) node <- char2pos(x, node) if (type == "children") return(Children(x, node)) if (type == "tips") return(bip(x)[node]) # new version using Rcpp @@ -779,7 +779,7 @@ Descendants <- function(x, node, type = c("tips", "children", "all")) { #' @export Siblings <- function(x, node, include.self = FALSE) { if (missing(node)) node <- as.integer(1:max(x$edge)) - if(inherits(node, "character")) node <- char2pos(x, node) + if(!missing(node) && inherits(node, "character")) node <- char2pos(x, node) l <- length(node) if (l == 1) { v <- Children(x, Ancestors(x, node, "parent")) @@ -813,7 +813,7 @@ Siblings <- function(x, node, include.self = FALSE) { #' @export mrca.phylo <- function(x, node = NULL, full = FALSE) { if (is.null(node)) return(mrca2(x, full = full)) - if(inherits(node, "character")) node <- char2pos(x, node) + if(!missing(node) && inherits(node, "character")) node <- char2pos(x, node) return(getMRCA(x, node)) } From 3efcf6daf838e27ec64f2464ac7a892780950cdd Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Tue, 20 Jun 2023 09:05:27 +0200 Subject: [PATCH 027/216] clean up --- R/modelTest.R | 1 - vignettes/Morphological.Rmd | 16 +++++++++------- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/R/modelTest.R b/R/modelTest.R index f94251cb..603eb252 100644 --- a/R/modelTest.R +++ b/R/modelTest.R @@ -209,7 +209,6 @@ modelTest <- function(object, tree = NULL, model = NULL, G = TRUE, I = TRUE, m <- m + 1 } if (FREQ & G & I) { -# if (trace > 0) print(paste0(model, "+G+I+F")) fitGIF <- update(fitIF, k = k) fitGIF <- optim.pml(fitGIF, model = model, optBf = TRUE, optInv = TRUE, optGamma = TRUE, control = control) diff --git a/vignettes/Morphological.Rmd b/vignettes/Morphological.Rmd index 76124e73..76c99cc3 100644 --- a/vignettes/Morphological.Rmd +++ b/vignettes/Morphological.Rmd @@ -40,15 +40,15 @@ mm <- read.csv(file.path(fdir, "mites.csv"), row.names = 1) mm_pd <- phyDat(as.matrix(mm), type = "USER", levels = 0:7) ``` The data can then be written into a _nexus_ file: -```{r write nexus, eval=FALSE} +```{r write_nexus, eval=FALSE} write.phyDat(mm_pd, file.path(fdir, "mites.nex"), format = "nexus") ``` Reading in a _nexus_ file is even easier than reading in a _csv_ file: -```{r, read nexus} +```{r, read_nexus} mm_pd <- read.phyDat(file.path(fdir, "mites.nex"), format = "nexus", type = "STANDARD") ``` After reading in the _nexus_ file, we have the states 0:9, but the data only has the states 0:7. Here is one possibility to change the contrast matrix: -``` {r contrast matrix} +``` {r contrast_matrix} contrast <- matrix(data = c(1,0,0,0,0,0,0,0,0, 0,1,0,0,0,0,0,0,0, 0,0,1,0,0,0,0,0,0, @@ -104,17 +104,19 @@ mm_tree_rooted <- root(mm_tree, outgroup = "C._cymba", resolve.root = TRUE, ## Plot trees -With `plotBS`, we can either plot all of the trees with their respective edge support, or we can subset to only get a certain tree. It is also possible to save the plots as _.pdf_ (or various other formats, e.g. svg, png, tiff) file. `digits` is an argument to determine the number of digits shown for the bootstrap values. -```{r plot_trees, eval=FALSE} + +With `plotBS` we plot a tree with their respective edge support. It is also possible to save the plots as _.pdf_ (or various other formats, e.g. svg, png, tiff) file. `digits` is an argument to determine the number of digits shown for the bootstrap values. +```{r plot_trees, eval=FALSE} # subsetting for tree nr. 9 plotBS(mm_tree_rooted[[9]], digits = 2) # save plot as pdf pdf(file = "mm_rooted.pdf") -plotBS(mm_tree_rooted, digits = 2) +plotBS(mm_tree_rooted[[9]], digits = 2) dev.off() ``` From 7b938630dacb5ceafd48db9b163b56b019aebde3 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Tue, 20 Jun 2023 09:07:30 +0200 Subject: [PATCH 028/216] update --- NEWS | 4 ++++ R/bootstrap.R | 1 + 2 files changed, 5 insertions(+) diff --git a/NEWS b/NEWS index 8f1094f1..16f04e3f 100644 --- a/NEWS +++ b/NEWS @@ -22,6 +22,10 @@ OTHER CHANGES o the sankoff algorithm has been rewritten. + o functions Descendants, Ancestors, Siblings, mrca.phy now also accept a + + character vector for the node argument + CHANGES in PHANGORN VERSION 2.11.0 diff --git a/R/bootstrap.R b/R/bootstrap.R index 1e853c76..04696537 100644 --- a/R/bootstrap.R +++ b/R/bootstrap.R @@ -286,6 +286,7 @@ plotBS <- function(tree, BStrees, type = "phylogram", type <- match.arg(type, c("phylogram", "cladogram", "fan", "unrooted", "radial", "none")) if(inherits(tree, "pml")) tree <- tree$tree + if(!inherits(tree, "phylo")) stop("tree must be of class phylo!") method <- match.arg(method, c("FBP", "TBE", "MCC")) if (hasArg(BStrees)) { if(method=="FBP" || method=="MCC"){ From 367961bba807104a416c80dc53c4f3b2f7073ada Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Thu, 22 Jun 2023 14:49:56 +0200 Subject: [PATCH 029/216] improve rbind.phyDat --- R/phyDat.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/phyDat.R b/R/phyDat.R index 567cbb9e..22b12325 100644 --- a/R/phyDat.R +++ b/R/phyDat.R @@ -169,11 +169,11 @@ rbind.phyDat <- function(...){ l <- sapply(x, function(x)sum(attr(x, "weight"))) if(any(l!=l[1]))stop("Alignments have different # of characters!") if(any(types!=types[1]))stop("Alignments must have same type!") - nam <- sapply(x, names) + nam <- lapply(x, names) |> unlist() if(any(duplicated(nam)))stop("Duplicated names!") m <- lengths(x) mcs <- c(0, cumsum(m)) - res <- matrix(NA_character_, sum(m), l[1]) + res <- matrix(NA_character_, sum(m), l[1], dimnames=list(nam, NULL)) for(i in seq_along(x)){ res[(mcs[i]+1):mcs[i+1], ] <- as.character(x[[i]]) } From 6174f0d9682168698539622345611fb080699b8a Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Tue, 4 Jul 2023 18:13:37 +0200 Subject: [PATCH 030/216] reorganize code --- R/bootstrap.R | 125 --------------------------------------- R/plotBS.R | 158 ++++++++++++++++++++++++++++++++++++++++++++++++++ man/plotBS.Rd | 22 ++++--- 3 files changed, 173 insertions(+), 132 deletions(-) create mode 100644 R/plotBS.R diff --git a/R/bootstrap.R b/R/bootstrap.R index 04696537..b0d6ac67 100644 --- a/R/bootstrap.R +++ b/R/bootstrap.R @@ -220,131 +220,6 @@ checkLabels <- function(tree, tip) { } -#' Plotting trees with bootstrap values -#' -#' \code{plotBS} plots a phylogenetic tree with the bootstrap values assigned -#' to the (internal) edges. It can also used to assign bootstrap values to a -#' phylogenetic tree. -#' -#' \code{plotBS} can either assign the classical Felsenstein’s bootstrap -#' proportions (FBP) (Felsenstein (1985), Hendy & Penny (1985)) or the -#' transfer bootstrap expectation (TBE) of Lemoine et al. (2018). Using the -#' option \code{type=="n"} just assigns the bootstrap values and return the tree -#' without plotting it. -#' -#' @param tree The tree on which edges the bootstrap values are plotted. -#' @param BStrees a list of trees (object of class "multiPhylo"). -#' @param type the type of tree to plot, one of "phylogram", "cladogram", "fan", -#' "unrooted", "radial" or "none". If type is "none" the tree is returned with -#' the bootstrap values assigned to the node labels. -#' @param method either "FBP" the classical bootstrap (default), "TBE" -#' (transfer bootstrap) or "MCC" for assigning clade credibilities. In case of -#' "MCC" all trees need to be rooted. -#' @param bs.col color of bootstrap support labels. -#' @param bs.adj one or two numeric values specifying the horizontal and -#' vertical justification of the bootstrap labels. -#' @param digits integer indicating the number of decimal places. -#' @param p only plot support values higher than this percentage number -#' (default is 0). -#' @param \dots further parameters used by \code{plot.phylo}. -#' @param frame a character string specifying the kind of frame to be printed -#' around the bootstrap values. This must be one of "none" (the default), -#' "rect" or "circle". -#' @return \code{plotBS} returns silently a tree, i.e. an object of class -#' \code{phylo} with the bootstrap values as node labels. The argument -#' \code{BStrees} is optional and if not supplied the labels supplied -#' in the \code{node.label} slot will be used. -#' @author Klaus Schliep \email{klaus.schliep@@gmail.com} -#' @seealso \code{\link{transferBootstrap}}, \code{\link{plot.phylo}}, -#' \code{\link{maxCladeCred}}, \code{\link{nodelabels}}, -#' \code{\link{consensus}}, \code{\link{consensusNet}} -#' @references Felsenstein J. (1985) Confidence limits on phylogenies. An -#' approach using the bootstrap. \emph{Evolution} \bold{39}, 783--791 -#' -#' Lemoine, F., Entfellner, J. B. D., Wilkinson, E., Correia, D., Felipe, M. D., -#' De Oliveira, T., & Gascuel, O. (2018). Renewing Felsenstein’s phylogenetic -#' bootstrap in the era of big data. \emph{Nature}, \bold{556(7702)}, 452--456. -#' -#' Penny D. and Hendy M.D. (1985) Testing methods evolutionary tree -#' construction. \emph{Cladistics} \bold{1}, 266--278 -#' -#' Penny D. and Hendy M.D. (1986) Estimating the reliability of evolutionary -#' trees. \emph{Molecular Biology and Evolution} \bold{3}, 403--417 -#' @examples -#' fdir <- system.file("extdata/trees", package = "phangorn") -#' # RAxML best-known tree with bipartition support (from previous analysis) -#' raxml.tree <- read.tree(file.path(fdir,"RAxML_bipartitions.woodmouse")) -#' # RAxML bootstrap trees (from previous analysis) -#' raxml.bootstrap <- read.tree(file.path(fdir,"RAxML_bootstrap.woodmouse")) -#' par(mfrow=c(1,2)) -#' plotBS(raxml.tree, raxml.bootstrap, "p") -#' plotBS(raxml.tree, raxml.bootstrap, "p", "TBE") -#' @export -plotBS <- function(tree, BStrees, type = "phylogram", - method="FBP", bs.col = "black", - bs.adj = NULL, digits=3, p = 0, frame = "none", ...) { - type <- match.arg(type, c("phylogram", "cladogram", "fan", "unrooted", - "radial", "none")) - if(inherits(tree, "pml")) tree <- tree$tree - if(!inherits(tree, "phylo")) stop("tree must be of class phylo!") - method <- match.arg(method, c("FBP", "TBE", "MCC")) - if (hasArg(BStrees)) { - if(method=="FBP" || method=="MCC"){ - BStrees <- .uncompressTipLabel(BStrees) # check if needed - if(method=="MCC" && any(!is.rooted(BStrees))) - stop("All trees need to be rooted for method 'MCC'!") - if (method=="FBP" && any(is.rooted(BStrees))) BStrees <- unroot(BStrees) - x <- prop.clades(tree, BStrees) - x <- (x / length(BStrees)) * 100 - tree$node.label <- x - } - else { - tree <- transferBootstrap(tree, BStrees) - x <- tree$node.label - } - } - else { - if (is.null(tree$node.label)) stop("You need to supply 'trees' or the tree needs support-values as node.label") - x <- tree$node.label - } - if(type=="none") return( tree ) - plot(tree, type = type, ...) - - label <- c(rep(0, length(tree$tip.label)), x) - ind <- get("last_plot.phylo", envir = .PlotPhyloEnv)$edge[ ,2 ] - if (type == "phylogram" | type == "cladogram") { - root <- getRoot(tree) - label <- c(rep(0, length(tree$tip.label)), x) - label[root] <- 0 - ind <- which(label > p) - if (is.null(bs.adj)) { - bs.adj <- c(1, 1) - } - if (length(ind) > 0) { - if(is.numeric(label)) label <- round(label, digits = digits) - nodelabels( - text = label[ind], node = ind, - frame = frame, col = bs.col, adj = bs.adj, ... - ) - } - } - else { - if (is.null(bs.adj)) { - bs.adj <- c(0.5, 0.5) - } - ind2 <- which(label[ind] > p) - if (length(ind2 > 0)) { - if(is.numeric(label)) label <- round(label, digits = digits) - edgelabels(label[ind][ind2], ind2, - frame = frame, - col = bs.col, adj = bs.adj, ... - ) - } - } - invisible(tree) -} - - cladeMatrix <- function(x, rooted = FALSE) { if (!rooted) x <- unroot(x) pp <- prop.part(x) diff --git a/R/plotBS.R b/R/plotBS.R new file mode 100644 index 00000000..7e76648c --- /dev/null +++ b/R/plotBS.R @@ -0,0 +1,158 @@ +support <- function(tree, trees, method="FBP", tol=1e-8, scale=TRUE){ + trees <- keep.tip(trees, tree$tip.label) + method <- match.arg(method, c("FBP", "TBE", "MCC")) + tip2root <- ifelse(method=="MCC", TRUE, FALSE) + if(all(sapply(trees, \(x)!is.null(x$edge.length)))){ + trees <- di2multi(trees, tol=tol) # , tip2root=tip2root) + } + if(method=="FBP" || method=="MCC"){ + trees <- .uncompressTipLabel(trees) # check if needed + if(method=="MCC" && any(!is.rooted(trees))) + stop("All trees need to be rooted for method 'MCC'!") + if (method=="FBP" && any(is.rooted(trees))) trees <- unroot(trees) + x <- prop.clades(tree, trees) + x <- (x / length(trees)) + if(!scale) x <- x * 100 + } + else { + x <- transferBootstrap(tree, trees, FALSE, scale=scale) + } + x +} + + + +#' Plotting trees with bootstrap values +#' +#' \code{plotBS} plots a phylogenetic tree with the bootstrap values assigned +#' to the (internal) edges. It can also used to assign bootstrap values to a +#' phylogenetic tree. \code{add_support} adds support values to a plot. +#' +#' The functions can either assign the classical Felsenstein’s bootstrap +#' proportions (FBP) (Felsenstein (1985), Hendy & Penny (1985)) or the +#' transfer bootstrap expectation (TBE) of Lemoine et al. (2018). Using the +#' option \code{type=="n"} just assigns the bootstrap values and return the tree +#' without plotting it. +#' +#' +#' @param tree The tree on which edges the bootstrap values are plotted. +#' @param BStrees a list of trees (object of class "multiPhylo"). +#' @param type the type of tree to plot, one of "phylogram", "cladogram", "fan", +#' "unrooted", "radial" or "none". If type is "none" the tree is returned with +#' the bootstrap values assigned to the node labels. +#' @param method either "FBP" the classical bootstrap (default), "TBE" +#' (transfer bootstrap) or "MCC" for assigning clade credibilities. In case of +#' "MCC" all trees need to be rooted. +#' @param bs.col color of bootstrap support labels. +#' @param bs.adj one or two numeric values specifying the horizontal and +#' vertical justification of the bootstrap labels. +#' @param digits integer indicating the number of decimal places. +#' @param p only plot support values higher than this percentage number +#' (default is 0). +#' @param \dots further parameters used by \code{plot.phylo}. +#' @param frame a character string specifying the kind of frame to be printed +#' around the bootstrap values. This must be one of "none" (the default), +#' "rect" or "circle". +#' @param tol a numeric value giving the tolerance to consider a branch length +#' significantly greater than zero. +#' @return \code{plotBS} returns silently a tree, i.e. an object of class +#' \code{phylo} with the bootstrap values as node labels. The argument +#' \code{BStrees} is optional and if not supplied the labels supplied +#' in the \code{node.label} slot will be used. +#' @author Klaus Schliep \email{klaus.schliep@@gmail.com} +#' @seealso \code{\link{plot.phylo}}, \code{\link{add_ci}}, +#' \code{\link{nodelabels}}, +#' \code{\link{prop.clades}}, \code{\link{maxCladeCred}}, +#' \code{\link{transferBootstrap}}, \code{\link{consensus}}, +#' \code{\link{consensusNet}} +#' @references Felsenstein J. (1985) Confidence limits on phylogenies. An +#' approach using the bootstrap. \emph{Evolution} \bold{39}, 783--791 +#' +#' Lemoine, F., Entfellner, J. B. D., Wilkinson, E., Correia, D., Felipe, M. D., +#' De Oliveira, T., & Gascuel, O. (2018). Renewing Felsenstein’s phylogenetic +#' bootstrap in the era of big data. \emph{Nature}, \bold{556(7702)}, 452--456. +#' +#' Penny D. and Hendy M.D. (1985) Testing methods evolutionary tree +#' construction. \emph{Cladistics} \bold{1}, 266--278 +#' +#' Penny D. and Hendy M.D. (1986) Estimating the reliability of evolutionary +#' trees. \emph{Molecular Biology and Evolution} \bold{3}, 403--417 +#' @examples +#' fdir <- system.file("extdata/trees", package = "phangorn") +#' # RAxML best-known tree with bipartition support (from previous analysis) +#' raxml.tree <- read.tree(file.path(fdir,"RAxML_bipartitions.woodmouse")) +#' # RAxML bootstrap trees (from previous analysis) +#' raxml.bootstrap <- read.tree(file.path(fdir,"RAxML_bootstrap.woodmouse")) +#' par(mfrow=c(1,2)) +#' plotBS(raxml.tree, raxml.bootstrap, "p") +#' plotBS(raxml.tree, raxml.bootstrap, "p", "TBE") +#' @rdname plotBS +#' @export +plotBS <- function(tree, BStrees, type = "phylogram", method="FBP", + bs.col = "black", bs.adj = NULL, digits=3, p = 0, + frame = "none", tol=1e-6, ...) { + type <- match.arg(type, c("phylogram", "cladogram", "fan", "unrooted", + "radial", "none")) + if(inherits(tree, "pml")) tree <- tree$tree + if(!inherits(tree, "phylo")) stop("tree must be of class phylo!") + method <- match.arg(method, c("FBP", "TBE", "MCC")) + + if (hasArg(BStrees)) { + x <-support(tree, BStrees, method="FBP", tol=tol) + tree$node.label <- x + } + else { + if (is.null(tree$node.label)) stop("You need to supply 'trees' or the tree needs support-values as node.label") +# x <- tree$node.label + } + if(type=="none") return( tree ) + plot(tree, type = type, ...) + + label <- c(rep(0, length(tree$tip.label)), x) + ind <- get("last_plot.phylo", envir = .PlotPhyloEnv)$edge[ ,2 ] + if (type == "phylogram" | type == "cladogram") { + root <- getRoot(tree) + label <- c(rep(0, length(tree$tip.label)), x) + label[root] <- 0 + ind <- which(label > p) + if (is.null(bs.adj)) { + bs.adj <- c(1, 1) + } + if (length(ind) > 0) { + if(is.numeric(label)) label <- round(label, digits = digits) + nodelabels( + text = label[ind], node = ind, + frame = frame, col = bs.col, adj = bs.adj, ... + ) + } + } + else { + if (is.null(bs.adj)) { + bs.adj <- c(0.5, 0.5) + } + ind2 <- which(label[ind] > p) + if (length(ind2 > 0)) { + if(is.numeric(label)) label <- round(label, digits = digits) + edgelabels(label[ind][ind2], ind2, + frame = frame, + col = bs.col, adj = bs.adj, ... + ) + } + } + invisible(tree) +} + + +#' @rdname plotBS +#' @export +add_support <- function(tree, trees,method="FBP", tol=1e-8, + scale=TRUE, frame="none", ...){ + lastPP <- get("last_plot.phylo", envir = .PlotPhyloEnv) +# if(!all.equal(lastPP$edge, reorder(tree)$edge)) +# stop("tree seems to differ from the last plot!") + x <- support(tree, trees,method="FBP", tol=tol, scale=scale) + drawSupportOnEdges(x, frame=frame, ...) +} + + + diff --git a/man/plotBS.Rd b/man/plotBS.Rd index e862831c..959a1d67 100644 --- a/man/plotBS.Rd +++ b/man/plotBS.Rd @@ -1,12 +1,16 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/bootstrap.R +% Please edit documentation in R/plotBS.R \name{plotBS} \alias{plotBS} +\alias{add_support} \title{Plotting trees with bootstrap values} \usage{ plotBS(tree, BStrees, type = "phylogram", method = "FBP", bs.col = "black", bs.adj = NULL, digits = 3, p = 0, frame = "none", - ...) + tol = 1e-06, ...) + +add_support(tree, trees, method = "FBP", tol = 1e-08, scale = TRUE, + frame = "none", ...) } \arguments{ \item{tree}{The tree on which edges the bootstrap values are plotted.} @@ -35,6 +39,9 @@ vertical justification of the bootstrap labels.} around the bootstrap values. This must be one of "none" (the default), "rect" or "circle".} +\item{tol}{a numeric value giving the tolerance to consider a branch length +significantly greater than zero.} + \item{\dots}{further parameters used by \code{plot.phylo}.} } \value{ @@ -46,10 +53,10 @@ in the \code{node.label} slot will be used. \description{ \code{plotBS} plots a phylogenetic tree with the bootstrap values assigned to the (internal) edges. It can also used to assign bootstrap values to a -phylogenetic tree. +phylogenetic tree. \code{add_support} adds support values to a plot. } \details{ -\code{plotBS} can either assign the classical Felsenstein’s bootstrap +The functions can either assign the classical Felsenstein’s bootstrap proportions (FBP) (Felsenstein (1985), Hendy & Penny (1985)) or the transfer bootstrap expectation (TBE) of Lemoine et al. (2018). Using the option \code{type=="n"} just assigns the bootstrap values and return the tree @@ -80,9 +87,10 @@ Penny D. and Hendy M.D. (1986) Estimating the reliability of evolutionary trees. \emph{Molecular Biology and Evolution} \bold{3}, 403--417 } \seealso{ -\code{\link{transferBootstrap}}, \code{\link{plot.phylo}}, -\code{\link{maxCladeCred}}, \code{\link{nodelabels}}, -\code{\link{consensus}}, \code{\link{consensusNet}} +\code{\link{plot.phylo}}, \code{\link{nodelabels}}, +\code{\link{prop.clades}}, \code{\link{maxCladeCred}}, +\code{\link{transferBootstrap}}, \code{\link{consensus}}, +\code{\link{consensusNet}} } \author{ Klaus Schliep \email{klaus.schliep@gmail.com} From d44c9b32fe0edeb034a2b766d1d90e61f8079f1a Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Tue, 4 Jul 2023 18:15:30 +0200 Subject: [PATCH 031/216] allow direction and rotating the plot --- R/networx.R | 83 ++++++++++++++++++++++++++++++++------------- man/plot.networx.Rd | 4 ++- man/plotBS.Rd | 3 +- 3 files changed, 64 insertions(+), 26 deletions(-) diff --git a/R/networx.R b/R/networx.R index befcab4f..f47d2519 100644 --- a/R/networx.R +++ b/R/networx.R @@ -499,7 +499,6 @@ coords.equal.angle <- function(obj) { coords <- function(obj, dim = "3D") { # if(is.null(attr(obj,"order")) || (attr(obj, "order")=="postorder") ) # obj = reorder.networx(obj) - if (dim == "equal_angle") return(coords.equal.angle(obj)) l <- length(obj$edge.length) @@ -509,7 +508,6 @@ coords <- function(obj, dim = "3D") { adj <- spMatrix(n, n, i = obj$edge[, 2], j = obj$edge[, 1], x = rep(1, length(obj$edge.length))) g <- graph_from_adjacency_matrix(adj, "undirected") - ########## # add this # g2 <- graph(t(obj$edge), directed=FALSE) # g2 <- set.edge.attribute(g, "weight", value=rep(1, nrow(obj$edge)) @@ -533,10 +531,12 @@ coords <- function(obj, dim = "3D") { else { coord <- layout_nicely(g, dim = 2) k <- matrix(0, max(obj$splitIndex), 2) - for (i in ind1) { - tmp <- coord[obj$edge[i, 2], ] - coord[obj$edge[i, 1], ] - k[obj$splitIndex[i], ] <- kart2kreis(tmp[1], tmp[2]) - } + tmp <- coord[obj$edge[ind1, 2], ] - coord[obj$edge[ind1, 1], ] + k[obj$splitIndex[ind1], ] <- kart2kreis(tmp[,1], tmp[,2]) +# for (i in ind1) { +# tmp <- coord[obj$edge[i, 2], ] - coord[obj$edge[i, 1], ] +# k[obj$splitIndex[i], ] <- kart2kreis(tmp[1], tmp[2]) +# } k[obj$splitIndex[ind1], 1] <- obj$edge.length[ind1] res <- matrix(0, vcount(g), 2) for (i in 1:l) { @@ -563,8 +563,9 @@ kart2kugel <- function(x, y, z) { kart2kreis <- function(x, y) { r <- sqrt(x * x + y * y) alpha <- atan(y / x) - if (x < 0) alpha <- alpha + pi - c(r, alpha) + #if (x < 0) alpha <- alpha + pi + if (any(x < 0)) alpha[x < 0] <- alpha[x < 0] + pi + cbind(r, alpha) } @@ -594,6 +595,13 @@ edgeLabels <- function(xx, yy, zz = NULL, edge) { } +rotate_matrix <- function(x, theta){ + rot_matrix <- matrix(c(cos(theta), sin(theta), -sin(theta), cos(theta)), + 2, 2, byrow = TRUE) + x %*% rot_matrix +} + + #' plot phylogenetic networks #' #' So far not all parameters behave the same on the the \code{rgl} \code{"3D"} @@ -637,6 +645,7 @@ edgeLabels <- function(xx, yy, zz = NULL, edge) { #' @param font.edge.label the font used for the edge labels. #' @param underscore a logical specifying whether the underscores in tip labels #' should be written as spaces (the default) or left as are (if TRUE). +#' @param angle rotate the plot. #' @param \dots Further arguments passed to or from other methods. #' @rdname plot.networx #' @note The internal representation is likely to change. @@ -679,7 +688,8 @@ plot.networx <- function(x, type = "equal angle", use.edge.length = TRUE, cex = par("cex"), cex.node.label = cex, cex.edge.label = cex, col.node.label = tip.color, col.edge.label = tip.color, font.node.label = font, - font.edge.label = font, underscore = FALSE, ...) { + font.edge.label = font, underscore = FALSE, + angle=0, ...) { type <- match.arg(type, c("equal angle", "3D", "2D")) if (use.edge.length == FALSE){ x$edge.length[] <- 1 @@ -746,6 +756,10 @@ plot.networx <- function(x, type = "equal angle", use.edge.length = TRUE, if (type == "equal angle") coord <- coords.equal.angle(x) else coord <- coords(x, dim = "2D") } + if(angle != 0){ + angle <- angle * pi/180 # + coord <- rotate_matrix(coord, angle) + } plot2D(coord, x, show.tip.label = show.tip.label, show.edge.label = show.edge.label, edge.label = edge.label, show.node.label = show.node.label, node.label = node.label, @@ -754,7 +768,7 @@ plot.networx <- function(x, type = "equal angle", use.edge.length = TRUE, cex.node.label = cex.node.label, cex.edge.label = cex.edge.label, col.node.label = col.node.label, col.edge.label = col.edge.label, font.node.label = font.node.label, font.edge.label = font.edge.label, - add = FALSE) + add = FALSE, ...) } x$.plot <- list(vertices = coord, edge.color = edge.color, edge.width = edge.width, edge.lty = edge.lty) @@ -827,7 +841,7 @@ plot2D <- function(coords, net, show.tip.label = TRUE, show.edge.label = FALSE, cex.node.label = cex, cex.edge.label = cex, col.node.label = tip.color, col.edge.label = tip.color, font.node.label = font, font.edge.label = font, - add = FALSE, ...) { + add = FALSE, direction="horizontal", ...) { edge <- net$edge label <- net$tip.label xx <- coords[, 1] @@ -836,7 +850,7 @@ plot2D <- function(coords, net, show.tip.label = TRUE, show.edge.label = FALSE, xlim <- range(xx) ylim <- range(yy) - + direction <- match.arg(direction, c("horizontal", "axial")) if (show.tip.label) { offset <- max(nchar(label)) * 0.018 * cex * diff(xlim) xlim <- c(xlim[1] - offset, xlim[2] + offset) @@ -851,21 +865,42 @@ plot2D <- function(coords, net, show.tip.label = TRUE, show.edge.label = FALSE, if (show.tip.label) { if (is.null(net$translate)) ind <- match(1:nTips, edge[, 2]) else ind <- match(net$translate$node, edge[, 2]) - pos <- rep(4, nTips) - XX <- xx[edge[ind, 1]] - xx[edge[ind, 2]] - pos[XX > 0] <- 2 - YY <- yy[edge[ind, 1]] - yy[edge[ind, 2]] - pos2 <- rep(3, nTips) - pos2[YY > 0] <- 1 - # needed if tiplabels are not at internal nodes - XX[is.na(XX)] <- 0 - YY[is.na(YY)] <- 0 - pos[abs(YY) > abs(XX)] <- pos2[abs(YY) > abs(XX)] - if (is.null(net$translate)) text(xx[1:nTips], yy[1:nTips], labels = label, + if(direction=="horizontal"){ + pos <- rep(4, nTips) + XX <- xx[edge[ind, 1]] - xx[edge[ind, 2]] + pos[XX > 0] <- 2 + YY <- yy[edge[ind, 1]] - yy[edge[ind, 2]] + pos2 <- rep(3, nTips) + pos2[YY > 0] <- 1 + # needed if tiplabels are not at internal nodes + XX[is.na(XX)] <- 0 + YY[is.na(YY)] <- 0 + pos[abs(YY) > abs(XX)] <- pos2[abs(YY) > abs(XX)] + if (is.null(net$translate)) text(xx[1:nTips], yy[1:nTips], labels = label, pos = pos, col = tip.color, cex = cex, font = font) - else text(xx[net$translate$node], yy[net$translate$node], labels = label, + else text(xx[net$translate$node], yy[net$translate$node], labels = label, pos = pos, col = tip.color, cex = cex, font = font) + } + else { + XX <- xx[edge[ind, 2]] - xx[edge[ind, 1]] + YY <- yy[edge[ind, 2]] - yy[edge[ind, 1]] + angle <- kart2kreis(XX, YY)[,2] + adj <- abs(angle) > pi/2 + angle <- angle * 180/pi # switch to degrees + angle[adj] <- angle[adj] - 180 + adj <- as.numeric(adj) + ## `srt' takes only a single value, so can't vectorize this: + ## (and need to 'elongate' these vectors:) + font <- rep(font, length.out = Ntip) + tip.color <- rep(tip.color, length.out = Ntip) + cex <- rep(cex, length.out = Ntip) + for (i in 1:length(label)) + text(xx[i], yy[i], label[i], font = font[i], + cex = cex[i], srt = angle[i], adj = adj[i], + col = tip.color[i]) + } } + if (show.edge.label) { ec <- edgeLabels(xx, yy, edge = edge) if (is.null(edge.label)) edge.label <- net$splitIndex diff --git a/man/plot.networx.Rd b/man/plot.networx.Rd index b10eab0f..5f86bbc4 100644 --- a/man/plot.networx.Rd +++ b/man/plot.networx.Rd @@ -12,7 +12,7 @@ split.lty = NULL, font = 3, cex = par("cex"), cex.node.label = cex, cex.edge.label = cex, col.node.label = tip.color, col.edge.label = tip.color, font.node.label = font, - font.edge.label = font, underscore = FALSE, ...) + font.edge.label = font, underscore = FALSE, angle = 0, ...) } \arguments{ \item{x}{an object of class \code{"networx"}} @@ -75,6 +75,8 @@ labels.} \item{underscore}{a logical specifying whether the underscores in tip labels should be written as spaces (the default) or left as are (if TRUE).} +\item{angle}{rotate the plot.} + \item{\dots}{Further arguments passed to or from other methods.} } \description{ diff --git a/man/plotBS.Rd b/man/plotBS.Rd index 959a1d67..9b8199c1 100644 --- a/man/plotBS.Rd +++ b/man/plotBS.Rd @@ -87,7 +87,8 @@ Penny D. and Hendy M.D. (1986) Estimating the reliability of evolutionary trees. \emph{Molecular Biology and Evolution} \bold{3}, 403--417 } \seealso{ -\code{\link{plot.phylo}}, \code{\link{nodelabels}}, +\code{\link{plot.phylo}}, \code{\link{add_ci}}, +\code{\link{nodelabels}}, \code{\link{prop.clades}}, \code{\link{maxCladeCred}}, \code{\link{transferBootstrap}}, \code{\link{consensus}}, \code{\link{consensusNet}} From 242b63d353c4bb990fd7d4f69fcb9244f67494d0 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Tue, 4 Jul 2023 18:19:14 +0200 Subject: [PATCH 032/216] add keep.as.tip --- NAMESPACE | 2 ++ R/transferBootstrap.R | 6 ++++-- R/treeManipulation.R | 11 +++++++++++ man/midpoint.Rd | 5 +++++ man/transferBootstrap.Rd | 4 +++- 5 files changed, 25 insertions(+), 3 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 2a772a16..561ba49b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -97,6 +97,7 @@ export(addTrivialSplits) export(add_boxplot) export(add_ci) export(add_edge_length) +export(add_support) export(allCircularSplits) export(allCompat) export(allDescendants) @@ -155,6 +156,7 @@ export(h2st) export(h4st) export(hadamard) export(hash) +export(keep.as.tip) export(ldfactorial) export(lento) export(lli) diff --git a/R/transferBootstrap.R b/R/transferBootstrap.R index a2fc9181..d83ceb0e 100644 --- a/R/transferBootstrap.R +++ b/R/transferBootstrap.R @@ -8,6 +8,7 @@ #' @param BStrees a list of trees (object of class "multiPhylo"). #' @param phylo Logical, return a phylogentic tree with support value or a #' vector of bootstrap values. +#' @param scale scale the values. #' @return \code{plotBS} returns silently a tree, i.e. an object of class #' \code{phylo} with the bootstrap values as node labels. The argument #' \code{BSTrees} is optional and if not supplied the labels supplied @@ -32,7 +33,7 @@ #' # same as #' plotBS(raxml.tree, raxml.bootstrap, "p", "TBE") #' @export -transferBootstrap <- function(tree, BStrees, phylo=TRUE){ +transferBootstrap <- function(tree, BStrees, phylo=TRUE, scale=TRUE){ if(!inherits(BStrees, "multiPhylo")) stop("BSTrees needs to be of class multiPhylo!") BStrees <- .uncompressTipLabel(BStrees) @@ -53,7 +54,8 @@ transferBootstrap <- function(tree, BStrees, phylo=TRUE){ ind <- which(is.na(ind) & not_cherry) for(j in ind) res[j] <- res[j] + Transfer_Index(bp[[j]], tmp$edge, l) } - res <- res / length(BStrees) * 100 + res <- res / length(BStrees) + if(! scale) res <- res * 100 res <- c(NA_real_, res) if(!phylo) return(res) tree$node.label <- res diff --git a/R/treeManipulation.R b/R/treeManipulation.R index b0207a82..286ca9d8 100644 --- a/R/treeManipulation.R +++ b/R/treeManipulation.R @@ -858,3 +858,14 @@ relabel <- function(y, ref) { y$tip.label <- ref y } + +#' @rdname midpoint +#' @param labels tip and node labels to keep as tip labels in the tree +#' @export +keep.as.tip<- function(tree, labels){ + nodes_to_keep <- labels[!is.na(match(labels, tree$node.label))] + tips_to_remove <- Descendants(tree, nodes_to_keep) |> unlist() |> unique() + tree_1 <- drop.tip(tree, tips_to_remove, subtree = TRUE) + tree_2 <- keep.tip(tree_1, tips_and_nodes_to_keep, collapse.singles=FALSE) + tree_2 +} diff --git a/man/midpoint.Rd b/man/midpoint.Rd index e3a98b06..3341064a 100644 --- a/man/midpoint.Rd +++ b/man/midpoint.Rd @@ -6,6 +6,7 @@ \alias{midpoint.phylo} \alias{midpoint.multiPhylo} \alias{pruneTree} +\alias{keep.as.tip} \title{Tree manipulation} \usage{ getRoot(tree) @@ -17,6 +18,8 @@ midpoint(tree, node.labels = "support", ...) \method{midpoint}{multiPhylo}(tree, node.labels = "support", ...) pruneTree(tree, ..., FUN = ">=") + +keep.as.tip(tree, labels) } \arguments{ \item{tree}{an object of class \code{phylo}.} @@ -27,6 +30,8 @@ should labels get 'deleted'?} \item{\dots}{further arguments, passed to other methods.} \item{FUN}{a function evaluated on the nodelabels, result must be logical.} + +\item{labels}{tip and node labels to keep as tip labels in the tree} } \value{ \code{pruneTree} and \code{midpoint} a tree. \code{getRoot} returns diff --git a/man/transferBootstrap.Rd b/man/transferBootstrap.Rd index c62a2af8..d54eed1c 100644 --- a/man/transferBootstrap.Rd +++ b/man/transferBootstrap.Rd @@ -4,7 +4,7 @@ \alias{transferBootstrap} \title{Transfer Bootstrap} \usage{ -transferBootstrap(tree, BStrees, phylo = TRUE) +transferBootstrap(tree, BStrees, phylo = TRUE, scale = TRUE) } \arguments{ \item{tree}{The tree on which edges the bootstrap values are plotted.} @@ -13,6 +13,8 @@ transferBootstrap(tree, BStrees, phylo = TRUE) \item{phylo}{Logical, return a phylogentic tree with support value or a vector of bootstrap values.} + +\item{scale}{scale the values.} } \value{ \code{plotBS} returns silently a tree, i.e. an object of class From 4e29500ee5b9e118b5de38ee57eb44446462e6de Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Tue, 4 Jul 2023 18:20:10 +0200 Subject: [PATCH 033/216] small improvements --- R/plot_pml.R | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/R/plot_pml.R b/R/plot_pml.R index 86fe607d..b6265966 100644 --- a/R/plot_pml.R +++ b/R/plot_pml.R @@ -38,8 +38,10 @@ plot.pml <- function(x, type="phylogram", direction = "rightwards", ...){ type <- match.arg(type, c("phylogram","cladogram", "fan", "unrooted", "radial", "tidy")) - plot.phylo(x$tree, type=type, direction=direction, ...) - if(is.rooted(x$tree) && (type %in% c("phylogram","cladogram"))){ + tree <- x$tree + if(!is.rooted(tree) && (type != "unrooted") ) tree <- midpoint(tree) + plot.phylo(tree, type=type, direction=direction, ...) + if(is.rooted(tree) && (type %in% c("phylogram","cladogram"))){ direction <- match.arg(direction, c("rightwards", "leftwards", "upwards", "downwards")) side <- switch(direction, @@ -54,4 +56,5 @@ plot.pml <- function(x, type="phylogram", direction = "rightwards", ...){ else axisPhylo(side) } else add.scale.bar() + if(!is.null(x$bs)) add_support(tree, x$bs) } From 97c9e244f7da5782c62c1603db942cae42407a6b Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Tue, 4 Jul 2023 21:44:08 +0200 Subject: [PATCH 034/216] bugfix --- R/plotBS.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/plotBS.R b/R/plotBS.R index 7e76648c..142ba8b4 100644 --- a/R/plotBS.R +++ b/R/plotBS.R @@ -103,7 +103,7 @@ plotBS <- function(tree, BStrees, type = "phylogram", method="FBP", } else { if (is.null(tree$node.label)) stop("You need to supply 'trees' or the tree needs support-values as node.label") -# x <- tree$node.label + x <- tree$node.label } if(type=="none") return( tree ) plot(tree, type = type, ...) From f09bcbdbfe968862c09faf12766bff5c22a998d0 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Tue, 4 Jul 2023 22:13:16 +0200 Subject: [PATCH 035/216] bug fix --- R/plotBS.R | 11 ++++++----- man/plotBS.Rd | 11 ++++++----- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/R/plotBS.R b/R/plotBS.R index 142ba8b4..cf2fe8ec 100644 --- a/R/plotBS.R +++ b/R/plotBS.R @@ -36,7 +36,7 @@ support <- function(tree, trees, method="FBP", tol=1e-8, scale=TRUE){ #' #' #' @param tree The tree on which edges the bootstrap values are plotted. -#' @param BStrees a list of trees (object of class "multiPhylo"). +#' @param trees a list of trees (object of class "multiPhylo"). #' @param type the type of tree to plot, one of "phylogram", "cladogram", "fan", #' "unrooted", "radial" or "none". If type is "none" the tree is returned with #' the bootstrap values assigned to the node labels. @@ -55,9 +55,10 @@ support <- function(tree, trees, method="FBP", tol=1e-8, scale=TRUE){ #' "rect" or "circle". #' @param tol a numeric value giving the tolerance to consider a branch length #' significantly greater than zero. +#' @param scale return ratio or percentage. #' @return \code{plotBS} returns silently a tree, i.e. an object of class #' \code{phylo} with the bootstrap values as node labels. The argument -#' \code{BStrees} is optional and if not supplied the labels supplied +#' \code{trees} is optional and if not supplied the labels supplied #' in the \code{node.label} slot will be used. #' @author Klaus Schliep \email{klaus.schliep@@gmail.com} #' @seealso \code{\link{plot.phylo}}, \code{\link{add_ci}}, @@ -88,7 +89,7 @@ support <- function(tree, trees, method="FBP", tol=1e-8, scale=TRUE){ #' plotBS(raxml.tree, raxml.bootstrap, "p", "TBE") #' @rdname plotBS #' @export -plotBS <- function(tree, BStrees, type = "phylogram", method="FBP", +plotBS <- function(tree, trees, type = "phylogram", method="FBP", bs.col = "black", bs.adj = NULL, digits=3, p = 0, frame = "none", tol=1e-6, ...) { type <- match.arg(type, c("phylogram", "cladogram", "fan", "unrooted", @@ -97,8 +98,8 @@ plotBS <- function(tree, BStrees, type = "phylogram", method="FBP", if(!inherits(tree, "phylo")) stop("tree must be of class phylo!") method <- match.arg(method, c("FBP", "TBE", "MCC")) - if (hasArg(BStrees)) { - x <-support(tree, BStrees, method="FBP", tol=tol) + if (hasArg(trees)) { + x <-support(tree, trees, method="FBP", tol=tol) tree$node.label <- x } else { diff --git a/man/plotBS.Rd b/man/plotBS.Rd index 9b8199c1..2da59af9 100644 --- a/man/plotBS.Rd +++ b/man/plotBS.Rd @@ -5,9 +5,8 @@ \alias{add_support} \title{Plotting trees with bootstrap values} \usage{ -plotBS(tree, BStrees, type = "phylogram", method = "FBP", - bs.col = "black", bs.adj = NULL, digits = 3, p = 0, frame = "none", - tol = 1e-06, ...) +plotBS(tree, trees, type = "phylogram", method = "FBP", bs.col = "black", + bs.adj = NULL, digits = 3, p = 0, frame = "none", tol = 1e-06, ...) add_support(tree, trees, method = "FBP", tol = 1e-08, scale = TRUE, frame = "none", ...) @@ -15,7 +14,7 @@ add_support(tree, trees, method = "FBP", tol = 1e-08, scale = TRUE, \arguments{ \item{tree}{The tree on which edges the bootstrap values are plotted.} -\item{BStrees}{a list of trees (object of class "multiPhylo").} +\item{trees}{a list of trees (object of class "multiPhylo").} \item{type}{the type of tree to plot, one of "phylogram", "cladogram", "fan", "unrooted", "radial" or "none". If type is "none" the tree is returned with @@ -43,11 +42,13 @@ around the bootstrap values. This must be one of "none" (the default), significantly greater than zero.} \item{\dots}{further parameters used by \code{plot.phylo}.} + +\item{scale}{return ratio or percentage.} } \value{ \code{plotBS} returns silently a tree, i.e. an object of class \code{phylo} with the bootstrap values as node labels. The argument -\code{BStrees} is optional and if not supplied the labels supplied +\code{trees} is optional and if not supplied the labels supplied in the \code{node.label} slot will be used. } \description{ From 992eb1830920fe0e812674874e56025fe5e61fb4 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Tue, 4 Jul 2023 22:16:52 +0200 Subject: [PATCH 036/216] bugfix --- R/treeManipulation.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/treeManipulation.R b/R/treeManipulation.R index 286ca9d8..055b1c74 100644 --- a/R/treeManipulation.R +++ b/R/treeManipulation.R @@ -866,6 +866,6 @@ keep.as.tip<- function(tree, labels){ nodes_to_keep <- labels[!is.na(match(labels, tree$node.label))] tips_to_remove <- Descendants(tree, nodes_to_keep) |> unlist() |> unique() tree_1 <- drop.tip(tree, tips_to_remove, subtree = TRUE) - tree_2 <- keep.tip(tree_1, tips_and_nodes_to_keep, collapse.singles=FALSE) + tree_2 <- keep.tip(tree_1, labels, collapse.singles=FALSE) tree_2 } From f98a1cf8599561e24630f0819fb975110565a218 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Tue, 4 Jul 2023 22:21:55 +0200 Subject: [PATCH 037/216] use experimental ape --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 687898f6..b12d0fc2 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -67,7 +67,7 @@ Suggests: xtable LinkingTo: Rcpp, RcppArmadillo -Remotes: github::emmanuelparadis/ape +Remotes: github::KlausVigo/ape VignetteBuilder: knitr, utils From 0f70a4e496396bc5792f07b0fc25f2bed966a9fb Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Thu, 6 Jul 2023 13:32:37 +0200 Subject: [PATCH 038/216] change to ape develoment --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index b12d0fc2..687898f6 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -67,7 +67,7 @@ Suggests: xtable LinkingTo: Rcpp, RcppArmadillo -Remotes: github::KlausVigo/ape +Remotes: github::emmanuelparadis/ape VignetteBuilder: knitr, utils From e5b05e8aebbe66e36f5903153884952f0f24fe0a Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Tue, 11 Jul 2023 11:20:28 +0200 Subject: [PATCH 039/216] update --- NEWS | 2 ++ R/consensusNet.R | 2 +- man/consensusNet.Rd | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/NEWS b/NEWS index 16f04e3f..88757d3e 100644 --- a/NEWS +++ b/NEWS @@ -20,6 +20,8 @@ OTHER CHANGES o nicer defaults for plot.pml, mainly for rooted trees. + o plot.networx gets two additional arguments direction and angle. + o the sankoff algorithm has been rewritten. o functions Descendants, Ancestors, Siblings, mrca.phy now also accept a diff --git a/R/consensusNet.R b/R/consensusNet.R index 19ff59f5..169d976d 100644 --- a/R/consensusNet.R +++ b/R/consensusNet.R @@ -27,7 +27,7 @@ #' bs <- bootstrap.phyDat(Laurasiatherian, FUN = function(x)nj(dist.hamming(x)), #' bs=50) #' cnet <- consensusNet(bs, .3) -#' plot(cnet) +#' plot(cnet, angle=-60, direction="axial") #' \dontrun{ #' library(rgl) #' open3d() diff --git a/man/consensusNet.Rd b/man/consensusNet.Rd index 225e169e..001f15f3 100644 --- a/man/consensusNet.Rd +++ b/man/consensusNet.Rd @@ -31,7 +31,7 @@ set.seed(1) bs <- bootstrap.phyDat(Laurasiatherian, FUN = function(x)nj(dist.hamming(x)), bs=50) cnet <- consensusNet(bs, .3) -plot(cnet) +plot(cnet, angle=-60, direction="axial") \dontrun{ library(rgl) open3d() From 3ed5bafdd5321222ecf564832a5dbb040e2d2776 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Tue, 11 Jul 2023 11:21:21 +0200 Subject: [PATCH 040/216] noicer printing --- R/networx.R | 4 +++- R/splitsNetwork.R | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/R/networx.R b/R/networx.R index f47d2519..137df955 100644 --- a/R/networx.R +++ b/R/networx.R @@ -698,8 +698,10 @@ plot.networx <- function(x, type = "equal angle", use.edge.length = TRUE, nTips <- length(x$tip.label) conf <- attr(x$splits, "confidences") index <- x$splitIndex - if (is.null(edge.label) & !is.null(conf)) { + if(!is.null(edge.label) && is.numeric(edge.label)) edge.label <- prettyNum(edge.label) + if (is.null(edge.label) && !is.null(conf)) { conf <- conf[index] + if(is.numeric(conf)) conf <- prettyNum(conf) if (!is.null(x$translate)) conf[match(x$translate$node, x$edge[, 2])] <- "" else conf[x$edge[, 2] <= nTips] <- "" edge.label <- conf diff --git a/R/splitsNetwork.R b/R/splitsNetwork.R index b02df413..9d9eda08 100644 --- a/R/splitsNetwork.R +++ b/R/splitsNetwork.R @@ -51,7 +51,7 @@ splitsNetwork <- function(dm, splits = NULL, gamma = .1, lambda = 1e-6, if (!is.null(splits)) { tmp <- which(lengths(splits) == k) - splits <- splits[-tmp] + if(length(tmp)>0) splits <- splits[-tmp] lab <- attr(splits, "labels") dm <- dm[lab, lab] } From ec9d8cb3aac4fa7453aee04a828481ee9dcf7e39 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Mon, 17 Jul 2023 16:52:21 +0200 Subject: [PATCH 041/216] consensus trees might not be seen as rooted if root is not resolved --- R/draw_CI.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/draw_CI.R b/R/draw_CI.R index 934392f3..c4d78dff 100644 --- a/R/draw_CI.R +++ b/R/draw_CI.R @@ -6,7 +6,7 @@ edge_length_matrix <- function(tree, trees, rooted=TRUE){ tree <- unroot(tree) } else{ - if(!is.rooted(tree) || any(!is.rooted(trees))) stop("All trees need to be rooted!") + if(any(!is.rooted(trees))) stop("All trees need to be rooted!") } fun <- function(x){ el <- numeric(max(x$edge)) @@ -59,7 +59,7 @@ edge_length_matrix <- function(tree, trees, rooted=TRUE){ ##' @keywords aplot ##' @export add_edge_length <- function(tree, trees, fun=\(x)median(na.omit(x)), - rooted=all(is.rooted(c(tree, trees)))){ + rooted=all(is.rooted(trees))){ if(!rooted) tree <- unroot(tree) X <- edge_length_matrix(tree, trees, rooted) nh <- apply(X, 2, fun) From 30ffef7696df4eceeb640bde3f0679e9ced25400 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Mon, 17 Jul 2023 17:15:08 +0200 Subject: [PATCH 042/216] direct conversion of AAbin objects, uses less memory --- R/phyDat2.R | 40 +++++++++++++++++++++++++++++++--------- 1 file changed, 31 insertions(+), 9 deletions(-) diff --git a/R/phyDat2.R b/R/phyDat2.R index 763fb5b7..4d267e48 100644 --- a/R/phyDat2.R +++ b/R/phyDat2.R @@ -172,10 +172,12 @@ phyDat.AA <- function (data, return.index = TRUE){ # AAbin if (inherits(data,"AAbin")){ if(is.list(data)) data <- as.matrix(data) - data <- as.character(data) + data <- as.character(data) # not needed + } + if(inherits(data, "character")){ + data <- as.matrix(data) + data <- toupper(data) } - if(inherits(data, "character")) data <- as.matrix(data) - data <- toupper(data) aa <- c("A", "R", "N", "D", "C", "Q", "E", "G", "H", "I", "L", "K", "M", "F", "P", "S", "T", "W", "Y", "V") aa2 <- c("A", "R", "N", "D", "C", "Q", "E", "G", "H", "I", "L", "K", "M", "F", @@ -203,9 +205,22 @@ phyDat.AA <- function (data, return.index = TRUE){ n <- nrow(data) res <- vector("list", n) ind_na <- logical(length(weight)) - for(i in seq_len(n)){ - res[[i]] <- match(data[i, ind], aa2) - ind_na[is.na(res[[i]])] <- TRUE + + if(inherits(data,"AAbin")){ + cs <- rep(NA_integer_, 256) + # + cs[as.integer (sapply( aa2 , charToRaw) )] <- seq_along(aa2) + cs[as.integer (sapply( tolower(aa2) , charToRaw) )] <- seq_along(aa2) + for(i in seq_len(n)){ + res[[i]] <- cs[ as.integer(data[i, ind]) ] + # maybe check for NAs + # ind_na[is.na(res[[i]])] <- TRUE + } + } else { + for(i in seq_len(n)){ + res[[i]] <- match(data[i, ind], aa2) + ind_na[is.na(res[[i]])] <- TRUE + } } if(any(ind_na)){ warning("Found unknown characters (not supplied in levels). Deleted sites with unknown states.") @@ -239,9 +254,10 @@ phyDat.codon <- function (data, return.index = TRUE, ambiguity = "---", data <- as.matrix(data) data <- tolower(data) } - if (inherits(data,"DNAbin") || inherits(data, "phyDat")) + if (inherits(data,"DNAbin") || inherits(data, "phyDat")){ + if(is.list(data) && inherits(data, "phyDat")) data <- as.matrix(data) data <- as.character(data) - + } data[data=="u"] <- "t" stopcodon <- match.arg(stopcodon, c("exclude", "include")) @@ -257,7 +273,7 @@ phyDat.codon <- function (data, return.index = TRUE, ambiguity = "---", if(stopcodon!="include") codon <- codon[tmp != "*"] # no stop codons if(stopcodon=="exclude"){ - for(i in stopcodon){ + for(i in stop_codons){ data[data==i] <- NA } rm_stop <- which(is.na(data), arr.ind = TRUE)[,1] @@ -324,3 +340,9 @@ phyDat.codon <- function (data, return.index = TRUE, ambiguity = "---", res } + +## AA: add toupper to default +## DNA: add tolower, DNAbin to data +## default: create contrast from levels +## CODON: create contrast +## From 732e37adecde318fa30dfee6fdfae7b21936efef Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Tue, 18 Jul 2023 12:00:33 +0200 Subject: [PATCH 043/216] use node labels or add them --- R/ancestral_pml.R | 42 +++++++++++++++++++++++++++++------------- man/ancestral.pml.Rd | 15 +++++++++------ 2 files changed, 38 insertions(+), 19 deletions(-) diff --git a/R/ancestral_pml.R b/R/ancestral_pml.R index 5e8ef258..99c6f379 100644 --- a/R/ancestral_pml.R +++ b/R/ancestral_pml.R @@ -2,7 +2,6 @@ # ancestral sequences ML # - #' Ancestral character reconstruction. #' #' Marginal reconstruction of the ancestral character states. @@ -14,6 +13,9 @@ #' With parsimony reconstruction one has to keep in mind that there will be #' often no unique solution. #' +#' The functions use node labels if these are present and unique. Otherwise the +#' function \code{ape::MakeNodeLabel} is used to create them. +#' #' For further details see vignette("Ancestral"). #' #' @param object an object of class pml @@ -31,7 +33,7 @@ #' of class "phyDat", containing the ancestral states of all nodes. #' @author Klaus Schliep \email{klaus.schliep@@gmail.com} #' @seealso \code{\link{pml}}, \code{\link{parsimony}}, \code{\link[ape]{ace}}, -#' \code{\link[ape]{root}} +#' \code{\link[ape]{root}}, \code{\link[ape]{makeNodeLabel}} #' @references Felsenstein, J. (2004). \emph{Inferring Phylogenies}. Sinauer #' Associates, Sunderland. #' @@ -60,7 +62,7 @@ #' #' @rdname ancestral.pml #' @export -ancestral.pml <- function(object, type = "marginal", return = "prob") { +ancestral.pml <- function(object, type = "marginal", return = "prob", ...) { call <- match.call() pt <- match.arg(type, c("marginal", "joint", "ml", "bayes")) tree <- object$tree @@ -85,9 +87,10 @@ ancestral.pml <- function(object, type = "marginal", return = "prob") { dim(dat) <- c(l, m) x <- attributes(data) - label <- as.character(1:m) - nam <- tree$tip.label - label[seq_along(nam)] <- nam + label <- makeAncNodeLabel(tree, ...) +# label <- as.character(1:m) +# nam <- tree$tip.label +# label[seq_along(nam)] <- nam x[["names"]] <- label tmp <- length(data) @@ -188,7 +191,7 @@ fitchCoding2ambiguous <- function(x, type = "DNA") { #' @rdname ancestral.pml #' @export ancestral.pars <- function(tree, data, type = c("MPR", "ACCTRAN", "POSTORDER"), - cost = NULL, return = "prob") { + cost = NULL, return = "prob", ...) { call <- match.call() type <- match.arg(type) if (type == "ACCTRAN" || type=="POSTORDER") { @@ -233,7 +236,7 @@ mpr.help <- function(tree, data, cost = NULL) { } -mpr <- function(tree, data, cost = NULL, return = "prob") { +mpr <- function(tree, data, cost = NULL, return = "prob", ...) { data <- subset(data, tree$tip.label) att <- attributes(data) type <- att$type @@ -242,9 +245,10 @@ mpr <- function(tree, data, cost = NULL, return = "prob") { res <- mpr.help(tree, data, cost) l <- length(tree$tip.label) m <- length(res) - label <- as.character(1:m) - nam <- tree$tip.label - label[seq_along(nam)] <- nam + label <- makeAncNodeLabel(tree, ...) +# label <- as.character(1:m) +# nam <- tree$tip.label +# label[seq_along(nam)] <- nam att[["names"]] <- label ntips <- length(tree$tip.label) contrast <- att$contrast @@ -363,7 +367,7 @@ acctran <- function(tree, data) { } -ptree <- function(tree, data, return = "prob", acctran=TRUE) { +ptree <- function(tree, data, return = "prob", acctran=TRUE, ...) { tree <- reorder(tree, "postorder") data <- subset(data, tree$tip.label) edge <- tree$edge @@ -378,7 +382,8 @@ ptree <- function(tree, data, return = "prob", acctran=TRUE) { tmp <- tmp[tmp[,2]>Ntip(tree),] if(length(tmp)>0 && acctran==TRUE)f$acctran_traverse(tmp) res <- vector("list", m) - att$names <- c(att$names, as.character((nTip+1):m)) + att$names <- makeAncNodeLabel(tree, ...) +# att$names <- c(att$names, as.character((nTip+1):m)) if(return == "phyDat"){ if(type=="DNA"){ indx <- c(1, 2, 6, 3, 7, 9, 12, 4, 8, 10, 13, 11, 14, 15, 16) @@ -402,6 +407,17 @@ ptree <- function(tree, data, return = "prob", acctran=TRUE) { res } + +makeAncNodeLabel <- function(tree, ...){ + if(!is.null(tree$node.label)){ + node_label <- tree$node.label + if(length(unique(node_label)) == Nnode(tree)) return(c(tree$tip.label, node_label)) + else warning("Node labels are not unique, used makeNodeLabels(tree, ...) to create them!") + } + nodel_label <- makeNodeLabel(tree, ...) + c(tree$tip.label, node_label) +} + #parsimony.plot <- function(tree, ...) { # x <- numeric(max(tree$edge)) # x[tree$edge[, 2]] <- tree$edge.length diff --git a/man/ancestral.pml.Rd b/man/ancestral.pml.Rd index 96c59f2b..96cc3edc 100644 --- a/man/ancestral.pml.Rd +++ b/man/ancestral.pml.Rd @@ -7,13 +7,13 @@ \alias{plotAnc} \title{Ancestral character reconstruction.} \usage{ -ancestral.pml(object, type = "marginal", return = "prob") +ancestral.pml(object, type = "marginal", return = "prob", ...) ancestral.pars(tree, data, type = c("MPR", "ACCTRAN", "POSTORDER"), - cost = NULL, return = "prob") + cost = NULL, return = "prob", ...) pace(tree, data, type = c("MPR", "ACCTRAN", "POSTORDER"), cost = NULL, - return = "prob") + return = "prob", ...) plotAnc(tree, data, i = 1, site.pattern = TRUE, col = NULL, cex.pie = par("cex"), pos = "bottomright", ...) @@ -25,6 +25,8 @@ plotAnc(tree, data, i = 1, site.pattern = TRUE, col = NULL, \item{return}{return a \code{phyDat} object or matrix of probabilities.} +\item{\dots}{Further arguments passed to or from other methods.} + \item{tree}{a tree, i.e. an object of class pml} \item{data}{an object of class phyDat} @@ -40,8 +42,6 @@ plotAnc(tree, data, i = 1, site.pattern = TRUE, col = NULL, \item{cex.pie}{a numeric defining the size of the pie graphs} \item{pos}{a character string defining the position of the legend} - -\item{\dots}{Further arguments passed to or from other methods.} } \value{ %A matrix containing the the estimates character states. An object @@ -58,6 +58,9 @@ The argument "type" defines the criterion to assign the internal nodes. For With parsimony reconstruction one has to keep in mind that there will be often no unique solution. +The functions use node labels if these are present and unique. Otherwise the +function \code{ape::MakeNodeLabel} is used to create them. + For further details see vignette("Ancestral"). } \examples{ @@ -89,7 +92,7 @@ Press, Oxford. } \seealso{ \code{\link{pml}}, \code{\link{parsimony}}, \code{\link[ape]{ace}}, -\code{\link[ape]{root}} +\code{\link[ape]{root}}, \code{\link[ape]{makeNodeLabel}} } \author{ Klaus Schliep \email{klaus.schliep@gmail.com} From 9b04b5af342cdfd57a0c8b4f8ea119a8e89a3ab5 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Tue, 18 Jul 2023 16:09:20 +0200 Subject: [PATCH 044/216] bugfix --- R/ancestral_pml.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/ancestral_pml.R b/R/ancestral_pml.R index 99c6f379..ae45d383 100644 --- a/R/ancestral_pml.R +++ b/R/ancestral_pml.R @@ -412,10 +412,10 @@ makeAncNodeLabel <- function(tree, ...){ if(!is.null(tree$node.label)){ node_label <- tree$node.label if(length(unique(node_label)) == Nnode(tree)) return(c(tree$tip.label, node_label)) - else warning("Node labels are not unique, used makeNodeLabels(tree, ...) to create them!") + else message("Node labels are not unique, used makeNodeLabels(tree, ...) to create them!") } - nodel_label <- makeNodeLabel(tree, ...) - c(tree$tip.label, node_label) + tree <- makeNodeLabel(tree, ...) + c(tree$tip.label, tree$node.label) } #parsimony.plot <- function(tree, ...) { From dba93cc9420f3384e84e663f2052a938d87dcb04 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Tue, 18 Jul 2023 16:33:31 +0200 Subject: [PATCH 045/216] no unnecessary output --- inst/tinytest/test_bootstrap.R | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/inst/tinytest/test_bootstrap.R b/inst/tinytest/test_bootstrap.R index 3959fb6d..1f639222 100644 --- a/inst/tinytest/test_bootstrap.R +++ b/inst/tinytest/test_bootstrap.R @@ -22,8 +22,9 @@ expect_true(is.null(attr(nnet$splits, "confidences"))) expect_false(is.null(attr(nnet2$splits, "confidences"))) dat <- Laurasiatherian[sample(47, 10)] |> as.character() |> phyDat() -fit <- pml_bb(dat, "JC", rearrangement="NNI") -bs_trees_pml <- bootstrap.pml(fit, bs=10, rearrangement="NNI") +fit <- pml_bb(dat, "JC", rearrangement="NNI", control=pml.control(trace=0)) +bs_trees_pml <- bootstrap.pml(fit, bs=10, rearrangement="NNI", + control=pml.control(trace=0)) expect_inherits(bs_trees_pml, "multiPhylo") From 2250b13cb255a3afe136a64c4d3c8e331c9e2da8 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Tue, 18 Jul 2023 16:51:24 +0200 Subject: [PATCH 046/216] bugfix --- R/phyDat2.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/phyDat2.R b/R/phyDat2.R index 4d267e48..615bacca 100644 --- a/R/phyDat2.R +++ b/R/phyDat2.R @@ -176,7 +176,7 @@ phyDat.AA <- function (data, return.index = TRUE){ } if(inherits(data, "character")){ data <- as.matrix(data) - data <- toupper(data) +# data <- toupper(data) } aa <- c("A", "R", "N", "D", "C", "Q", "E", "G", "H", "I", "L", "K", "M", "F", "P", "S", "T", "W", "Y", "V") @@ -217,6 +217,7 @@ phyDat.AA <- function (data, return.index = TRUE){ # ind_na[is.na(res[[i]])] <- TRUE } } else { + data <- toupper(data) for(i in seq_len(n)){ res[[i]] <- match(data[i, ind], aa2) ind_na[is.na(res[[i]])] <- TRUE From 8bf0d2f75da612a2b7261c2bd40d15183324fcbd Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Tue, 18 Jul 2023 16:51:42 +0200 Subject: [PATCH 047/216] update --- man/add_edge_length.Rd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/man/add_edge_length.Rd b/man/add_edge_length.Rd index e79911e2..734c8874 100644 --- a/man/add_edge_length.Rd +++ b/man/add_edge_length.Rd @@ -5,7 +5,7 @@ \title{Assign and compute edge lengths from a sample of trees} \usage{ add_edge_length(tree, trees, fun = function(x) median(na.omit(x)), - rooted = all(is.rooted(c(tree, trees)))) + rooted = all(is.rooted(trees))) } \arguments{ \item{tree}{tree where edge lengths are assigned to.} From 3a62282ac4175bfd75ccf0bc46e0b53d7c1885dc Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Mon, 24 Jul 2023 11:07:16 +0200 Subject: [PATCH 048/216] change URL --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 1082b754..a0accbb8 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ [![CRAN Status Badge](https://www.r-pkg.org/badges/version/phangorn)](https://cran.r-project.org/package=phangorn) [![CRAN Downloads (monthly)](https://cranlogs.r-pkg.org/badges/phangorn)](https://cran.r-project.org/package=phangorn) [![CRAN Downloads (total)](https://cranlogs.r-pkg.org/badges/grand-total/phangorn)](https://cran.r-project.org/package=phangorn) -[![codecov.io](https://codecov.io/github/KlausVigo/phangorn/coverage.svg?branch=master)](https://codecov.io/github/KlausVigo/phangorn?branch=master) +[![codecov.io](https://codecov.io/github/KlausVigo/phangorn/coverage.svg?branch=master)](https://app.codecov.io/github/KlausVigo/phangorn?branch=master) # phangorn From 09db6f89938791fbdd3efffaf3d78ba44d1dda5e Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Mon, 24 Jul 2023 11:58:45 +0200 Subject: [PATCH 049/216] allow plotAnc to use proper phyDat arguments --- R/ancestral_pml.R | 40 ++++++++++++++++++++++++---------------- 1 file changed, 24 insertions(+), 16 deletions(-) diff --git a/R/ancestral_pml.R b/R/ancestral_pml.R index ae45d383..f0656a0d 100644 --- a/R/ancestral_pml.R +++ b/R/ancestral_pml.R @@ -156,6 +156,7 @@ ancestral.pml <- function(object, type = "marginal", return = "prob", ...) { } attributes(result) <- x attr(result, "call") <- call + if(return=="prob") class(result) <- c("ancestral", "phyDat") result } @@ -265,9 +266,10 @@ mpr <- function(tree, data, cost = NULL, return = "prob", ...) { if (return == "prob") { # for(i in 1:ntips) res[[i]] <- contrast[data[[i]],,drop=FALSE] if (return == "prob") res <- lapply(res, fun) + attributes(res) <- att + class(res) <- c("ancestral", "phyDat") } # else res[1:ntips] <- data[1:ntips] - attributes(res) <- att fun2 <- function(x) { x <- p2dna(x) fitchCoding2ambiguous(x) @@ -297,10 +299,12 @@ plotAnc <- function(tree, data, i = 1, site.pattern = TRUE, col = NULL, xrad <- CEX * diff(par("usr")[1:2]) / 50 levels <- attr(data, "levels") nc <- attr(data, "nc") - y <- matrix(unlist(y[]), ncol = nc, byrow = TRUE) - l <- dim(y)[1] - dat <- matrix(0, l, nc) - for (i in 1:l) dat[i, ] <- y[[i]] + if(inherits(data, "ancestral")){ + y <- matrix(unlist(y[]), ncol = nc, byrow = TRUE) + } else y <- attr(data, "contrast")[unlist(y),] +# l <- dim(y)[1] +# dat <- matrix(0, l, nc) +# for (i in 1:l) dat[i, ] <- y[[i]] plot(tree, label.offset = 1.1 * xrad, plot = FALSE, ...) lastPP <- get("last_plot.phylo", envir = .PlotPhyloEnv) XX <- lastPP$xx @@ -384,16 +388,8 @@ ptree <- function(tree, data, return = "prob", acctran=TRUE, ...) { res <- vector("list", m) att$names <- makeAncNodeLabel(tree, ...) # att$names <- c(att$names, as.character((nTip+1):m)) - if(return == "phyDat"){ - if(type=="DNA"){ - indx <- c(1, 2, 6, 3, 7, 9, 12, 4, 8, 10, 13, 11, 14, 15, 16) - res[1:nTip] <- data[1:nTip] - for(i in (nTip+1):m) - res[[i]] <- indx[f$getAncAmb(i)[1:nr]] - } - else stop("This is only for nucleotide sequences supported so far") - } - else { + + if(return == "prob") { fun <- function(X) { rs <- rowSums(X) X / rs @@ -402,8 +398,20 @@ ptree <- function(tree, data, return = "prob", acctran=TRUE, ...) { for(i in seq_len(nTip)) res[[i]] <- contrast[data[[i]], , drop=FALSE] for(i in (nTip+1):m) res[[i]] <- f$getAnc(i)[1:nr, , drop=FALSE] res <- lapply(res, fun) + attributes(res) <- att + class(res) <- c("ancestral", "phyDat") + } + else { + if(type=="DNA"){ + indx <- c(1, 2, 6, 3, 7, 9, 12, 4, 8, 10, 13, 11, 14, 15, 16) + res[1:nTip] <- data[1:nTip] + for(i in (nTip+1):m) + res[[i]] <- indx[f$getAncAmb(i)[1:nr]] + attributes(res) <- att + } + else stop("This is only for nucleotide sequences supported so far") } - attributes(res) <- att +# attributes(res) <- att res } From df2bb822678f05975bf539f2d02359e5f9c70f14 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Tue, 1 Aug 2023 12:16:12 +0200 Subject: [PATCH 050/216] handle ancestral states better --- R/ancestral_pml.R | 75 ++++++++++++++++++++++++++++++++++++-------- R/read.phyDat.R | 1 + man/ancestral.pml.Rd | 20 ++++++++++-- 3 files changed, 80 insertions(+), 16 deletions(-) diff --git a/R/ancestral_pml.R b/R/ancestral_pml.R index f0656a0d..d7588e27 100644 --- a/R/ancestral_pml.R +++ b/R/ancestral_pml.R @@ -28,9 +28,14 @@ #' @param pos a character string defining the position of the legend #' @param cost A cost matrix for the transitions between two states. #' @param return return a \code{phyDat} object or matrix of probabilities. +#' @param x an object of class ancestral. #' @param \dots Further arguments passed to or from other methods. -#' @return %A matrix containing the the estimates character states. An object -#' of class "phyDat", containing the ancestral states of all nodes. +#' @return An object of class ancestral containing the the estimates character +#' states. +#' For \code{return="phyDat"} an object of class "phyDat", containing +#' the ancestral states of all nodes. For nucleotide data this can contain +#' ambiguous states. Apart from fitch parsimony the most likely states are +#' returned. #' @author Klaus Schliep \email{klaus.schliep@@gmail.com} #' @seealso \code{\link{pml}}, \code{\link{parsimony}}, \code{\link[ape]{ace}}, #' \code{\link[ape]{root}}, \code{\link[ape]{makeNodeLabel}} @@ -47,6 +52,8 @@ #' @examples #' #' example(NJ) +#' # generate node labels to ensure plotting will work +#' tree <- makeNodeLabel(tree) #' fit <- pml(tree, Laurasiatherian) #' anc.ml <- ancestral.pml(fit, type = "ml") #' anc.p <- ancestral.pars(tree, Laurasiatherian) @@ -166,20 +173,55 @@ ancestral.pml <- function(object, type = "marginal", return = "prob", ...) { # } -# in mpr +#' @rdname ancestral.pml +#' @export ancestral2phyDat <- function(x) { - eps <- 1.0e-5 - contr <- attr(x, "contrast") - # a bit too complicated - ind1 <- which(apply(contr, 1, function(x) sum(x > eps)) == 1L) - ind2 <- which(contr[ind1, ] > eps, arr.ind = TRUE) - # pos <- ind2[match(as.integer(1L:ncol(contr)), ind2[,2]),1] - pos <- ind2[match(seq_len(ncol(contr)), ind2[, 2]), 1] - # only first hit - res <- lapply(x, function(x, pos) pos[max.col(x)], pos) + type <- attr(x, "type") + # else res[1:ntips] <- data[1:ntips] + fun2 <- function(x) { + x <- p2dna(x) + fitchCoding2ambiguous(x) + } + if (type == "DNA") { + res <- lapply(x, fun2) + } + else { + eps <- 1.0e-5 + contr <- attr(x, "contrast") + # a bit too complicated + ind1 <- which(apply(contr, 1, function(x) sum(x > eps)) == 1L) + ind2 <- which(contr[ind1, ] > eps, arr.ind = TRUE) + # pos <- ind2[match(as.integer(1L:ncol(contr)), ind2[,2]),1] + pos <- ind2[match(seq_len(ncol(contr)), ind2[, 2]), 1] + # only first hit + res <- lapply(x, function(x, pos) pos[max.col(x)], pos) + + } attributes(res) <- attributes(x) + class(res) <- "phyDat" + return(res) +} + + +#' @rdname ancestral.pml +#' @export +ancestral2df <- function(x) { + stopifnot(inherits(x, "ancestral")) + lab <- names(x) + states <- attr(x, "levels") + nr <- attr(x, "nr") + nc <- attr(x, "nc") + pos <- seq_len(nr) + X <- unlist(x) |> array(c(nr, nc, length(x)), + dimnames = list(Site=pos, attr(x, "levels"), Node=names(x))) + + z1 <- apply(X, 2L, c) + ## z1 <- matrix(x, ncol = 2L, dimnames = list(NULL, dimnames(x)[[3]])) + z2 <- expand.grid(dimnames(X)[c(1,3)]) + res <- data.frame(z2, z1) return(res) } +# TODO sort by site fitchCoding2ambiguous <- function(x, type = "DNA") { @@ -292,9 +334,16 @@ mpr <- function(tree, data, cost = NULL, return = "prob", ...) { #' @param site.pattern logical, plot i-th site pattern or i-th site #' @importFrom grDevices hcl.colors #' @export -plotAnc <- function(tree, data, i = 1, site.pattern = TRUE, col = NULL, +plotAnc <- function(tree, data, i = 1, site.pattern = FALSE, col = NULL, cex.pie = par("cex"), pos = "bottomright", ...) { + stopifnot(inherits(data, "phyDat")) y <- subset(data, select = i, site.pattern = site.pattern) + if(is.null(tree$node.label) || any(is.na(match(tree$node.label, names(y)))) || + is.numeric(tree$node.label)) + tree <- makeNodeLabel(tree) + if(any(is.na(match(c(tree$tip.label, tree$node.label), names(y))))) + stop("Tree needs nodelabel, which match the labels of the alignment!") + y <- y[c(tree$tip.label, tree$node.label),] CEX <- cex.pie xrad <- CEX * diff(par("usr")[1:2]) / 50 levels <- attr(data, "levels") diff --git a/R/read.phyDat.R b/R/read.phyDat.R index 6f2d4881..586515f7 100644 --- a/R/read.phyDat.R +++ b/R/read.phyDat.R @@ -114,6 +114,7 @@ read.phyDat <- function(file, format="phylip", type="DNA", ...){ #' @export write.phyDat <- function(x, file, format="phylip", colsep = "", nbcol=-1, ...){ formats <- c("phylip", "nexus", "interleaved", "sequential", "fasta") + if(inherits(x, "ancestral")) x <- ancestral2phyDat(x) format <- match.arg(tolower(format), formats) if(format=="nexus"){ type <- attr(x, "type") diff --git a/man/ancestral.pml.Rd b/man/ancestral.pml.Rd index 96cc3edc..ed1e3acd 100644 --- a/man/ancestral.pml.Rd +++ b/man/ancestral.pml.Rd @@ -2,6 +2,8 @@ % Please edit documentation in R/ancestral_pml.R \name{ancestral.pml} \alias{ancestral.pml} +\alias{ancestral2phyDat} +\alias{ancestral2df} \alias{ancestral.pars} \alias{pace} \alias{plotAnc} @@ -9,13 +11,17 @@ \usage{ ancestral.pml(object, type = "marginal", return = "prob", ...) +ancestral2phyDat(x) + +ancestral2df(x) + ancestral.pars(tree, data, type = c("MPR", "ACCTRAN", "POSTORDER"), cost = NULL, return = "prob", ...) pace(tree, data, type = c("MPR", "ACCTRAN", "POSTORDER"), cost = NULL, return = "prob", ...) -plotAnc(tree, data, i = 1, site.pattern = TRUE, col = NULL, +plotAnc(tree, data, i = 1, site.pattern = FALSE, col = NULL, cex.pie = par("cex"), pos = "bottomright", ...) } \arguments{ @@ -27,6 +33,8 @@ plotAnc(tree, data, i = 1, site.pattern = TRUE, col = NULL, \item{\dots}{Further arguments passed to or from other methods.} +\item{x}{an object of class ancestral.} + \item{tree}{a tree, i.e. an object of class pml} \item{data}{an object of class phyDat} @@ -44,8 +52,12 @@ plotAnc(tree, data, i = 1, site.pattern = TRUE, col = NULL, \item{pos}{a character string defining the position of the legend} } \value{ -%A matrix containing the the estimates character states. An object -of class "phyDat", containing the ancestral states of all nodes. +An object of class ancestral containing the the estimates character +states. +For \code{return="phyDat"} an object of class "phyDat", containing +the ancestral states of all nodes. For nucleotide data this can contain +ambiguous states. Apart from fitch parsimony the most likely states are +returned. } \description{ Marginal reconstruction of the ancestral character states. @@ -66,6 +78,8 @@ For further details see vignette("Ancestral"). \examples{ example(NJ) +# generate node labels to ensure plotting will work +tree <- makeNodeLabel(tree) fit <- pml(tree, Laurasiatherian) anc.ml <- ancestral.pml(fit, type = "ml") anc.p <- ancestral.pars(tree, Laurasiatherian) From 229d9a1a3bae1a7a8418859447bf2efb47643444 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Tue, 1 Aug 2023 12:17:41 +0200 Subject: [PATCH 051/216] distinguish unrooted plots --- R/plot_pml.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/plot_pml.R b/R/plot_pml.R index b6265966..6e91ef7b 100644 --- a/R/plot_pml.R +++ b/R/plot_pml.R @@ -53,7 +53,8 @@ plot.pml <- function(x, type="phylogram", direction = "rightwards", ...){ root_time <- max(x$tip.dates) - max(node.depth.edgelength(x$tree)) axisPhylo(side, root.time = root_time, backward = FALSE) } - else axisPhylo(side) + else if(x$method=="ultrametric") axisPhylo(side) + else add.scale.bar() } else add.scale.bar() if(!is.null(x$bs)) add_support(tree, x$bs) From 947252ebbcc7cd1a6e1d5acefa7b9cb5af040f6b Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Fri, 4 Aug 2023 17:34:11 +0200 Subject: [PATCH 052/216] reorganize code --- R/ancestral_pml.R | 74 +++++---------------------------- R/plotAnc.R | 99 ++++++++++++++++++++++++++++++++++++++++++++ man/ancestral.pml.Rd | 27 ++++-------- man/plotAnc.Rd | 62 +++++++++++++++++++++++++++ 4 files changed, 178 insertions(+), 84 deletions(-) create mode 100644 R/plotAnc.R create mode 100644 man/plotAnc.Rd diff --git a/R/ancestral_pml.R b/R/ancestral_pml.R index d7588e27..b8f34f9d 100644 --- a/R/ancestral_pml.R +++ b/R/ancestral_pml.R @@ -1,7 +1,3 @@ -# -# ancestral sequences ML -# - #' Ancestral character reconstruction. #' #' Marginal reconstruction of the ancestral character states. @@ -22,15 +18,11 @@ #' @param tree a tree, i.e. an object of class pml #' @param data an object of class phyDat #' @param type method used to assign characters to internal nodes, see details. -#' @param i plots the i-th site pattern of the \code{data}. -#' @param col a vector containing the colors for all possible states. -#' @param cex.pie a numeric defining the size of the pie graphs -#' @param pos a character string defining the position of the legend #' @param cost A cost matrix for the transitions between two states. #' @param return return a \code{phyDat} object or matrix of probabilities. #' @param x an object of class ancestral. #' @param \dots Further arguments passed to or from other methods. -#' @return An object of class ancestral containing the the estimates character +#' @return An object of class ancestral containing theestimated character #' states. #' For \code{return="phyDat"} an object of class "phyDat", containing #' the ancestral states of all nodes. For nucleotide data this can contain @@ -38,7 +30,8 @@ #' returned. #' @author Klaus Schliep \email{klaus.schliep@@gmail.com} #' @seealso \code{\link{pml}}, \code{\link{parsimony}}, \code{\link[ape]{ace}}, -#' \code{\link[ape]{root}}, \code{\link[ape]{makeNodeLabel}} +#' \code{\link{plotAnc}}, \code{\link[ape]{root}}, +#' \code{\link[ape]{makeNodeLabel}} #' @references Felsenstein, J. (2004). \emph{Inferring Phylogenies}. Sinauer #' Associates, Sunderland. #' @@ -175,7 +168,7 @@ ancestral.pml <- function(object, type = "marginal", return = "prob", ...) { #' @rdname ancestral.pml #' @export -ancestral2phyDat <- function(x) { +as.phyDat.ancestral <- function(x, ...) { type <- attr(x, "type") # else res[1:ntips] <- data[1:ntips] fun2 <- function(x) { @@ -205,7 +198,7 @@ ancestral2phyDat <- function(x) { #' @rdname ancestral.pml #' @export -ancestral2df <- function(x) { +as.data.frame.ancestral <- function(x) { stopifnot(inherits(x, "ancestral")) lab <- names(x) states <- attr(x, "levels") @@ -213,15 +206,13 @@ ancestral2df <- function(x) { nc <- attr(x, "nc") pos <- seq_len(nr) X <- unlist(x) |> array(c(nr, nc, length(x)), - dimnames = list(Site=pos, attr(x, "levels"), Node=names(x))) - + dimnames = list(Site=pos, attr(x, "levels"), Node=names(x))) z1 <- apply(X, 2L, c) - ## z1 <- matrix(x, ncol = 2L, dimnames = list(NULL, dimnames(x)[[3]])) z2 <- expand.grid(dimnames(X)[c(1,3)]) res <- data.frame(z2, z1) - return(res) + res <- data.frame(z2, z1) + res[order(res[,1], res[,2]), ] } -# TODO sort by site fitchCoding2ambiguous <- function(x, type = "DNA") { @@ -322,7 +313,7 @@ mpr <- function(tree, data, cost = NULL, return = "prob", ...) { attributes(res) <- att } else { - res <- ancestral2phyDat(res) + res <- as.phyDat.ancestral(res) } res[1:ntips] <- data } @@ -330,54 +321,9 @@ mpr <- function(tree, data, cost = NULL, return = "prob", ...) { } -#' @rdname ancestral.pml -#' @param site.pattern logical, plot i-th site pattern or i-th site -#' @importFrom grDevices hcl.colors -#' @export -plotAnc <- function(tree, data, i = 1, site.pattern = FALSE, col = NULL, - cex.pie = par("cex"), pos = "bottomright", ...) { - stopifnot(inherits(data, "phyDat")) - y <- subset(data, select = i, site.pattern = site.pattern) - if(is.null(tree$node.label) || any(is.na(match(tree$node.label, names(y)))) || - is.numeric(tree$node.label)) - tree <- makeNodeLabel(tree) - if(any(is.na(match(c(tree$tip.label, tree$node.label), names(y))))) - stop("Tree needs nodelabel, which match the labels of the alignment!") - y <- y[c(tree$tip.label, tree$node.label),] - CEX <- cex.pie - xrad <- CEX * diff(par("usr")[1:2]) / 50 - levels <- attr(data, "levels") - nc <- attr(data, "nc") - if(inherits(data, "ancestral")){ - y <- matrix(unlist(y[]), ncol = nc, byrow = TRUE) - } else y <- attr(data, "contrast")[unlist(y),] -# l <- dim(y)[1] -# dat <- matrix(0, l, nc) -# for (i in 1:l) dat[i, ] <- y[[i]] - plot(tree, label.offset = 1.1 * xrad, plot = FALSE, ...) - lastPP <- get("last_plot.phylo", envir = .PlotPhyloEnv) - XX <- lastPP$xx - YY <- lastPP$yy - xrad <- CEX * diff(lastPP$x.lim * 1.1) / 50 - par(new = TRUE) - plot(tree, label.offset = 1.1 * xrad, plot = TRUE, ...) - if (is.null(col)) col <- hcl.colors(nc) #rainbow(nc) - if (length(col) != nc) { - warning("Length of color vector differs from number of levels!") - } - BOTHlabels( - pie = y, XX = XX, YY = YY, adj = c(0.5, 0.5), frame = "rect", pch = NULL, - sel = seq_along(XX), thermo = NULL, piecol = col, col = "black", - bg = "lightblue", horiz = FALSE, width = NULL, height = NULL, cex = cex.pie - ) - if (!is.null(pos)) legend(pos, legend=levels, text.col = col) -} - # # ACCTRAN # - - acctran2 <- function(tree, data) { if(!is.binary(tree)) tree <- multi2di(tree) tree <- reorder(tree, "postorder") @@ -469,7 +415,7 @@ makeAncNodeLabel <- function(tree, ...){ if(!is.null(tree$node.label)){ node_label <- tree$node.label if(length(unique(node_label)) == Nnode(tree)) return(c(tree$tip.label, node_label)) - else message("Node labels are not unique, used makeNodeLabels(tree, ...) to create them!") + else message("Node labels are not unique, used makeNodeLabel(tree, ...) to create them!") } tree <- makeNodeLabel(tree, ...) c(tree$tip.label, tree$node.label) diff --git a/R/plotAnc.R b/R/plotAnc.R new file mode 100644 index 00000000..0b773710 --- /dev/null +++ b/R/plotAnc.R @@ -0,0 +1,99 @@ +getTransition <- function(scheme, levels){ + l <- length(scheme$properties) + P <- matrix(0, length(levels), l, + dimnames = list(levels, names(scheme$properties))) + for(i in seq_along(scheme$properties)){ + ind <- match(scheme$properties[[i]], levels) + P[ind,i] <- 1 + } + P +} + + +#' Plot ancestral character on a tree +#' +#' \code{plotAnc} plots a phylogeny and adds character to the nodes. Either +#' takes output from \code{ancestral.pars} or \code{ancestral.pml} or from an +#' alignment where there are node labels in the tree match the constructed +#' sequences in the alignment. +#' +#' For further details see vignette("Ancestral"). +#' +#' @param tree a tree, i.e. an object of class pml +#' @param data an object of class \code{phyDat} or \code{ancestral}. +#' @param site.pattern logical, plot i-th site pattern or i-th site +#' @param i plots the i-th site of the \code{data}. +#' @param col a vector containing the colors for all possible states. +#' @param cex.pie a numeric defining the size of the pie graphs. +#' @param pos a character string defining the position of the legend. +#' @param scheme color scheme, for amino acids this can be "Ape_AA", +#' "Clustal_AA", "Hydrophobicity_AA" and "Zappo_AA" are available. +#' @param \dots Further arguments passed to or from other methods. +#' @author Klaus Schliep \email{klaus.schliep@@gmail.com} +#' @seealso \code{\link{ancestral.pml}}, \code{\link[ape]{plot.phylo}} +#' @keywords plot +#' @examples +#' +#' example(NJ) +#' # generate node labels to ensure plotting will work +#' tree <- makeNodeLabel(tree) +#' anc.p <- ancestral.pars(tree, Laurasiatherian) +#' # plot the third character +#' plotAnc(tree, anc.ml, 3) +#' +#' data(chloroplast) +#' tree <- pratchet(chloroplast, maxit=10, trace=0) +#' tree <- makeNodeLabel(tree) +#' anc.ch <- ancestral.pars(tree, chloroplast) +#' image(chloroplast[, 1:25]) +#' plotAnc(tree, anc.ch, 21, scheme="Ape_AA") +#' plotAnc(tree, anc.ch, 21, scheme="Clustal_AA") +#' @importFrom grDevices hcl.colors +#' @export +plotAnc <- function(tree, data, i = 1, site.pattern = FALSE, col = NULL, + cex.pie = .5, pos = "bottomright", scheme=NULL, + ...) { + stopifnot(inherits(data, "phyDat")) + y <- subset(data, select = i, site.pattern = site.pattern) + if(is.null(tree$node.label) || any(is.na(match(tree$node.label, names(y)))) || + is.numeric(tree$node.label)) + tree <- makeNodeLabel(tree) + if(any(is.na(match(c(tree$tip.label, tree$node.label), names(y))))) + stop("Tree needs nodelabel, which match the labels of the alignment!") + y <- y[c(tree$tip.label, tree$node.label),] + CEX <- cex.pie + xrad <- CEX * diff(par("usr")[1:2]) / 50 + levels <- attr(data, "levels") + nc <- attr(data, "nc") + if(inherits(data, "ancestral")){ + y <- matrix(unlist(y[]), ncol = nc, byrow = TRUE) + } else y <- attr(data, "contrast")[unlist(y),] + if(!is.null(scheme)){ + sc <- get(scheme, environment(pml)) + P <- getTransition(sc, levels) + y <- y %*% P + levels <- colnames(P) + col <- sc$col + nc <- ncol(y) + } + # l <- dim(y)[1] + # dat <- matrix(0, l, nc) + # for (i in 1:l) dat[i, ] <- y[[i]] + plot(tree, label.offset = 1.1 * xrad, plot = FALSE, ...) + lastPP <- get("last_plot.phylo", envir = .PlotPhyloEnv) + XX <- lastPP$xx + YY <- lastPP$yy + xrad <- CEX * diff(lastPP$x.lim * 1.1) / 50 + par(new = TRUE) + plot(tree, label.offset = 1.1 * xrad, plot = TRUE, ...) + if (is.null(col)) col <- hcl.colors(nc) #rainbow(nc) + if (length(col) != nc) { + warning("Length of color vector differs from number of levels!") + } + BOTHlabels( + pie = y, XX = XX, YY = YY, adj = c(0.5, 0.5), frame = "rect", pch = NULL, + sel = seq_along(XX), thermo = NULL, piecol = col, col = "black", + bg = "lightblue", horiz = FALSE, width = NULL, height = NULL, cex = cex.pie + ) + if (!is.null(pos)) legend(pos, legend=levels, pch=19, col = col) +} diff --git a/man/ancestral.pml.Rd b/man/ancestral.pml.Rd index ed1e3acd..0ea809af 100644 --- a/man/ancestral.pml.Rd +++ b/man/ancestral.pml.Rd @@ -2,27 +2,23 @@ % Please edit documentation in R/ancestral_pml.R \name{ancestral.pml} \alias{ancestral.pml} -\alias{ancestral2phyDat} -\alias{ancestral2df} +\alias{as.phyDat.ancestral} +\alias{as.data.frame.ancestral} \alias{ancestral.pars} \alias{pace} -\alias{plotAnc} \title{Ancestral character reconstruction.} \usage{ ancestral.pml(object, type = "marginal", return = "prob", ...) -ancestral2phyDat(x) +\method{as.phyDat}{ancestral}(x, ...) -ancestral2df(x) +\method{as.data.frame}{ancestral}(x) ancestral.pars(tree, data, type = c("MPR", "ACCTRAN", "POSTORDER"), cost = NULL, return = "prob", ...) pace(tree, data, type = c("MPR", "ACCTRAN", "POSTORDER"), cost = NULL, return = "prob", ...) - -plotAnc(tree, data, i = 1, site.pattern = FALSE, col = NULL, - cex.pie = par("cex"), pos = "bottomright", ...) } \arguments{ \item{object}{an object of class pml} @@ -40,19 +36,9 @@ plotAnc(tree, data, i = 1, site.pattern = FALSE, col = NULL, \item{data}{an object of class phyDat} \item{cost}{A cost matrix for the transitions between two states.} - -\item{i}{plots the i-th site pattern of the \code{data}.} - -\item{site.pattern}{logical, plot i-th site pattern or i-th site} - -\item{col}{a vector containing the colors for all possible states.} - -\item{cex.pie}{a numeric defining the size of the pie graphs} - -\item{pos}{a character string defining the position of the legend} } \value{ -An object of class ancestral containing the the estimates character +An object of class ancestral containing theestimated character states. For \code{return="phyDat"} an object of class "phyDat", containing the ancestral states of all nodes. For nucleotide data this can contain @@ -106,7 +92,8 @@ Press, Oxford. } \seealso{ \code{\link{pml}}, \code{\link{parsimony}}, \code{\link[ape]{ace}}, -\code{\link[ape]{root}}, \code{\link[ape]{makeNodeLabel}} +\code{\link{plotAnc}}, \code{\link[ape]{root}}, +\code{\link[ape]{makeNodeLabel}} } \author{ Klaus Schliep \email{klaus.schliep@gmail.com} diff --git a/man/plotAnc.Rd b/man/plotAnc.Rd new file mode 100644 index 00000000..976b9139 --- /dev/null +++ b/man/plotAnc.Rd @@ -0,0 +1,62 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plotAnc.R +\name{plotAnc} +\alias{plotAnc} +\title{Plot ancestral character on a tree} +\usage{ +plotAnc(tree, data, i = 1, site.pattern = FALSE, col = NULL, + cex.pie = 0.5, pos = "bottomright", scheme = NULL, ...) +} +\arguments{ +\item{tree}{a tree, i.e. an object of class pml} + +\item{data}{an object of class \code{phyDat} or \code{ancestral}.} + +\item{i}{plots the i-th site of the \code{data}.} + +\item{site.pattern}{logical, plot i-th site pattern or i-th site} + +\item{col}{a vector containing the colors for all possible states.} + +\item{cex.pie}{a numeric defining the size of the pie graphs.} + +\item{pos}{a character string defining the position of the legend.} + +\item{scheme}{color scheme, for amino acids this can be "Ape_AA", +"Clustal_AA", "Hydrophobicity_AA" and "Zappo_AA" are available.} + +\item{\dots}{Further arguments passed to or from other methods.} +} +\description{ +\code{plotAnc} plots a phylogeny and adds character to the nodes. Either +takes output from \code{ancestral.pars} or \code{ancestral.pml} or from an +alignment where there are node labels in the tree match the constructed +sequences in the alignment. +} +\details{ +For further details see vignette("Ancestral"). +} +\examples{ + +example(NJ) +# generate node labels to ensure plotting will work +tree <- makeNodeLabel(tree) +anc.p <- ancestral.pars(tree, Laurasiatherian) +# plot the third character +plotAnc(tree, anc.ml, 3) + +data(chloroplast) +tree <- pratchet(chloroplast, maxit=10, trace=0) +tree <- makeNodeLabel(tree) +anc.ch <- ancestral.pars(tree, chloroplast) +image(chloroplast[, 1:25]) +plotAnc(tree, anc.ch, 21, scheme="Ape_AA") +plotAnc(tree, anc.ch, 21, scheme="Clustal_AA") +} +\seealso{ +\code{\link{ancestral.pml}}, \code{\link[ape]{plot.phylo}} +} +\author{ +Klaus Schliep \email{klaus.schliep@gmail.com} +} +\keyword{plot} From ec581fc5a7feb6bbe99345ee85c26558c1cc3076 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Fri, 4 Aug 2023 17:34:52 +0200 Subject: [PATCH 053/216] add colorschemes --- R/zzz.R | 77 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) diff --git a/R/zzz.R b/R/zzz.R index a37d8e3b..1f51d58f 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -42,6 +42,82 @@ .nucleotideAlphabet <- c("a", "c", "g", "t") +Ape_NT <- list(properties = list( + c("a", "g", "c", "t", "n", "-")), + color=c("red", "yellow", "green", "blue", "grey", "black")) + + +RY_NT <- list(properties = list( + Purine = c("a", "g", "r"), + Pyrimidine = c("c", "t", "y"), + "n" = "n", + "-" = "-"), + color=c("#FF00FF", "#00FFFF", "grey", "black")) + + + +Ape_AA <- list(properties = list( + Hydrophobic = c("V", "I", "L", "F", "W", "Y", "M"), + Small = c("P", "G", "A", "C"), + Hydrophilic = c("S", "T", "H", "N", "Q", "D", "E", "K", "R")), + color=c("red", "yellow", "blue")) + +# Properties + Conservation (Clustal X) +Clustal_AA <- list(properties = list( + Hydrophobic = c("A", "I", "L", "M", "F", "W", "V"), + Positive = c("K", "R"), + Negative = c("E", "D"), + Polar = c("N", "Q", "S", "T"), + Glycines = "G", + Prolines = "P", + Aromatic = c("H", "Y"), + Cysteine = "C"), + color= c("#80a0f0", "#f01505", "#c048c0", "#15c015", "#f09048", "#c0c000", + "#15a4a4", "#f08080") +) + + +# Physicochemical Properties +Zappo_AA <- list(properties = list( + "Aliphatic/Hydrophobic" = c("I", "L", "V", "A", "M"), + Aromatic = c("F", "W", "Y"), + Positive = c("K", "R", "H"), + Negative = c("E", "D"), + Hydrophilic = c("S", "T", "N", "Q"), + "Conformationally special" = c("P", "G"), + Cysteine = "C"), + color= c("#ff7979", "#f89f56", "#0070c0", "#c00000", "#08c81a", "#cc00cc", + "#ffff00") +) + + +Hydrophobicity_AA <- list(properties = list( + Lys = "K", + Asp = "D", + Glu = "E", + Arg = "R", + Gln = "Q", + Asn = "N", + Pro = "P", + His = "H", + Ser = "S", + Thr = "T", + Cys = "C", + Gly = "G", + Ala = "A", + Tyr = "Y", + Met = "M", + Val = "V", + Trp = "W", + Leu = "L", + Ile = "I", + Phe = "F" +), color=c("#0000FF", "#0D00F1", "#1A00E4", "#2800D6", "#3500C9", "#4300BB", + "#5000AE", "#5D00A1", "#6B0093", "#780086", "#860078", "#93006B", + "#A1005D", "#AE0050", "#BB0043", "#C90035", "#D60028", "#E4001A", + "#F1000D", "#FF0000")) + + # if rate g[i] is smaller than .gEps invariant site is increased by w[i] @@ -55,3 +131,4 @@ loadModule("Fitch_mod", TRUE) # .onLoad <- function(libname, pkgname) { # library.dynam("phangorn", pkgname, libname) #} + From 5db247451564413a7afced0cf601655df2e28b0a Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Fri, 4 Aug 2023 17:35:11 +0200 Subject: [PATCH 054/216] bugfix --- R/read.phyDat.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/read.phyDat.R b/R/read.phyDat.R index 586515f7..8865739e 100644 --- a/R/read.phyDat.R +++ b/R/read.phyDat.R @@ -114,7 +114,7 @@ read.phyDat <- function(file, format="phylip", type="DNA", ...){ #' @export write.phyDat <- function(x, file, format="phylip", colsep = "", nbcol=-1, ...){ formats <- c("phylip", "nexus", "interleaved", "sequential", "fasta") - if(inherits(x, "ancestral")) x <- ancestral2phyDat(x) + if(inherits(x, "ancestral")) x <- as.phyDat(x) format <- match.arg(tolower(format), formats) if(format=="nexus"){ type <- attr(x, "type") From d1a46cc029e360097de86dd80c6c8d6f1b19ccf4 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Fri, 4 Aug 2023 17:39:41 +0200 Subject: [PATCH 055/216] update --- NAMESPACE | 2 ++ NEWS | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/NAMESPACE b/NAMESPACE index 561ba49b..99b7b174 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -15,6 +15,7 @@ S3method(as.Matrix,splits) S3method(as.MultipleAlignment,phyDat) S3method(as.bitsplits,splits) S3method(as.character,phyDat) +S3method(as.data.frame,ancestral) S3method(as.data.frame,phyDat) S3method(as.matrix,splits) S3method(as.networx,phylo) @@ -22,6 +23,7 @@ S3method(as.networx,splits) S3method(as.phyDat,DNAbin) S3method(as.phyDat,MultipleAlignment) S3method(as.phyDat,alignment) +S3method(as.phyDat,ancestral) S3method(as.phyDat,character) S3method(as.phyDat,data.frame) S3method(as.phyDat,factor) diff --git a/NEWS b/NEWS index 88757d3e..57997825 100644 --- a/NEWS +++ b/NEWS @@ -26,7 +26,7 @@ OTHER CHANGES o functions Descendants, Ancestors, Siblings, mrca.phy now also accept a - character vector for the node argument + character vector for the node argument. CHANGES in PHANGORN VERSION 2.11.0 From f2724fe0a793b73e20876a6c01606a6686980056 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Fri, 4 Aug 2023 17:51:02 +0200 Subject: [PATCH 056/216] bugfix --- R/ancestral_pml.R | 2 +- man/ancestral.pml.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/ancestral_pml.R b/R/ancestral_pml.R index b8f34f9d..0fa325ad 100644 --- a/R/ancestral_pml.R +++ b/R/ancestral_pml.R @@ -198,7 +198,7 @@ as.phyDat.ancestral <- function(x, ...) { #' @rdname ancestral.pml #' @export -as.data.frame.ancestral <- function(x) { +as.data.frame.ancestral <- function(x, ...) { stopifnot(inherits(x, "ancestral")) lab <- names(x) states <- attr(x, "levels") diff --git a/man/ancestral.pml.Rd b/man/ancestral.pml.Rd index 0ea809af..a0c8b80d 100644 --- a/man/ancestral.pml.Rd +++ b/man/ancestral.pml.Rd @@ -12,7 +12,7 @@ ancestral.pml(object, type = "marginal", return = "prob", ...) \method{as.phyDat}{ancestral}(x, ...) -\method{as.data.frame}{ancestral}(x) +\method{as.data.frame}{ancestral}(x, ...) ancestral.pars(tree, data, type = c("MPR", "ACCTRAN", "POSTORDER"), cost = NULL, return = "prob", ...) From 5db5fcb9034f682918b236d33fe683c85f59bb4f Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Fri, 4 Aug 2023 19:10:46 +0200 Subject: [PATCH 057/216] bugfix --- R/plotAnc.R | 2 +- man/plotAnc.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/plotAnc.R b/R/plotAnc.R index 0b773710..b51d30be 100644 --- a/R/plotAnc.R +++ b/R/plotAnc.R @@ -39,7 +39,7 @@ getTransition <- function(scheme, levels){ #' tree <- makeNodeLabel(tree) #' anc.p <- ancestral.pars(tree, Laurasiatherian) #' # plot the third character -#' plotAnc(tree, anc.ml, 3) +#' plotAnc(tree, anc.p, 3) #' #' data(chloroplast) #' tree <- pratchet(chloroplast, maxit=10, trace=0) diff --git a/man/plotAnc.Rd b/man/plotAnc.Rd index 976b9139..6b3aee59 100644 --- a/man/plotAnc.Rd +++ b/man/plotAnc.Rd @@ -43,7 +43,7 @@ example(NJ) tree <- makeNodeLabel(tree) anc.p <- ancestral.pars(tree, Laurasiatherian) # plot the third character -plotAnc(tree, anc.ml, 3) +plotAnc(tree, anc.p, 3) data(chloroplast) tree <- pratchet(chloroplast, maxit=10, trace=0) From 2a6aba5ad6cbf4e0fb6da04eebbf946e786ae4da Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Mon, 21 Aug 2023 14:49:43 +0200 Subject: [PATCH 058/216] bug fix --- R/zzz.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/zzz.R b/R/zzz.R index 1f51d58f..8d03c21b 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -43,7 +43,7 @@ .nucleotideAlphabet <- c("a", "c", "g", "t") Ape_NT <- list(properties = list( - c("a", "g", "c", "t", "n", "-")), + a="a", g="g", c="c", t="t"), #"n", "-")), color=c("red", "yellow", "green", "blue", "grey", "black")) From d050425a5456a471250c57ba439c8612dd28e5b9 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Mon, 21 Aug 2023 14:50:02 +0200 Subject: [PATCH 059/216] update --- NEWS | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/NEWS b/NEWS index 57997825..36c693cc 100644 --- a/NEWS +++ b/NEWS @@ -28,6 +28,12 @@ OTHER CHANGES character vector for the node argument. + o plotAnc got an argument scheme allowing to use different color schemes. + + Some default values changed to produce nicer plots are nicer out of the + + box. + CHANGES in PHANGORN VERSION 2.11.0 From eb1f42934ce5027858cd36bd3b230f1664dc10a0 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Mon, 28 Aug 2023 13:01:02 +0200 Subject: [PATCH 060/216] clean up code --- R/distTree.R | 34 ++++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/R/distTree.R b/R/distTree.R index 9ec155eb..e82fdda5 100644 --- a/R/distTree.R +++ b/R/distTree.R @@ -182,25 +182,25 @@ UNJ <- function(x){ #' @export designTree <- function(tree, method = "unrooted", sparse = FALSE, tip.dates=NULL, ...) { - if (!is.na(pmatch(method, "all"))) - method <- "unrooted" - METHOD <- c("unrooted", "rooted", "tipdated") - method <- pmatch(method, METHOD) - if (is.na(method)) stop("invalid method") - if (method == -1) stop("ambiguous method") - if (!is.rooted(tree) & method == 2) stop("tree has to be rooted") - if (method == 1) { - X <- designUnrooted(tree, ...) - if (sparse) X <- Matrix(X) - } - if (method == 2) X <- designUltra(tree, sparse = sparse, ...) - if(method == 3) X <- designTipDated(tree, tip.dates=tip.dates, sparse=sparse, - ...) + method <- match.arg(method, + c("unrooted", "ultrametric", "rooted", "tipdated")) + if(method == "rooted") method <- "ultrametric" + #if (!is.na(pmatch(method, "all"))) + # method <- "unrooted" + #METHOD <- c("unrooted", "rooted", "tipdated") + #method <- pmatch(method, METHOD) + #if (is.na(method)) stop("invalid method") + #if (method == -1) stop("ambiguous method") + #if (!is.rooted(tree) & method == 2) stop("tree has to be rooted") + if(method == "unrooted") X <- designUnrooted(tree, sparse = sparse, ...) + if(method == "ultrametric") X <- designUltra(tree, sparse = sparse, ...) + if(method == "tipdated") X <- designTipDated(tree, tip.dates=tip.dates, + sparse=sparse, ...) X } -designUnrooted <- function(tree, order = NULL) { +designUnrooted <- function(tree, sparse=FALSE, order = NULL) { if (inherits(tree, "phylo")) { if (is.rooted(tree)) tree <- unroot(tree) tree <- reorder(tree, "postorder") @@ -225,6 +225,7 @@ designUnrooted <- function(tree, order = NULL) { } if (inherits(tree, "phylo")) colnames(res) <- paste(tree$edge[, 1], tree$edge[, 2], sep = "<->") + if(sparse) res <- Matrix(res, sparse=TRUE) res } @@ -342,6 +343,7 @@ designUnrooted2 <- function(tree, sparse = TRUE) { designTipDated <- function(tree, tip.dates, sparse = TRUE){ + stopifnot(is.numeric(tip.dates), length(tip.dates) >= Ntip(tree)) nTip <- Ntip(tree) tmp <- function(n){ x1 <- rep(seq_len(n), each=n) @@ -352,7 +354,7 @@ designTipDated <- function(tree, tip.dates, sparse = TRUE){ tip.dates <- tip.dates - max(tip.dates) x <- tmp(nTip) %*% tip.dates nodes <- integer(tree$Nnode) - X <- designUltra(tree) + X <- designUltra(tree, sparse=sparse) nodes <- attr(X, "nodes") X <- cbind(X, x) colnames(X) <- c(nodes, -1) From 14e039bfe7af0035b4035bd71adf33e442ba373d Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Tue, 29 Aug 2023 08:52:43 +0200 Subject: [PATCH 061/216] speed up candidate tree --- R/candidate_tree.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/candidate_tree.R b/R/candidate_tree.R index dfa838ec..374831f5 100644 --- a/R/candidate_tree.R +++ b/R/candidate_tree.R @@ -35,7 +35,7 @@ candidate_tree <- function(x, method=c("unrooted", "ultrametric", "tipdated"), enforce_ultrametric <- TRUE } if(method=="unrooted"){ - tree <- pratchet(x, maxit=10L, trace=0, perturbation = "ratchet") + tree <- pratchet(x, maxit=5L, trace=0, perturbation = "stochastic") tree <- multi2di(tree) tree <- unroot(tree) tree <- acctran(tree, x) From 8280af97ddc3f354716df121cdb64b57f1e708f6 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Tue, 29 Aug 2023 08:53:24 +0200 Subject: [PATCH 062/216] improve indexNNI to allow for different porpuses --- R/fitch64.R | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/R/fitch64.R b/R/fitch64.R index ffa8ee74..887b3447 100644 --- a/R/fitch64.R +++ b/R/fitch64.R @@ -152,7 +152,7 @@ fitch_spr <- function (tree, f, trace=0L) } -indexNNI_fitch <- function(tree, offset=2L*Ntip(tree)) { +indexNNI_fitch <- function(tree, offset=2L*Ntip(tree), rooted=is.rooted(tree)) { offset <- as.integer(offset) parent <- tree$edge[, 1] child <- tree$edge[, 2] @@ -169,6 +169,15 @@ indexNNI_fitch <- function(tree, offset=2L*Ntip(tree)) { # e-----f d is closest to root, f is root from subtree a,b,c # / \ # b c c(a,b,c,d,e,f) + + # d d is f + offset, if offset > 0 + # / + # f + # / \ + # e \ + # / \ c + # a b + k <- 1 for (i in ind) { f <- pvector[i] @@ -179,6 +188,12 @@ indexNNI_fitch <- function(tree, offset=2L*Ntip(tree)) { if (pvector[f]){ cd <- c(cd, f + offset) } + if(offset < 0L){ + tmp <- pvector[f] + if(tmp==0L) tmp <- f + # think about this more + cd[2] <- tmp + } # else if(rooted) cd <- c(cd, NA_integer_) # else if(!rooted) ef <- c(i, cd[2]) # else cd[2] <- f From 5a99179de58f0eeac8d3dae47ef9ba103e512545 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Wed, 6 Sep 2023 17:34:34 +0200 Subject: [PATCH 063/216] add several support values to a tree --- R/plotBS.R | 44 +++++++++++++++++++++++++++++++------------- man/plotBS.Rd | 7 +++++-- 2 files changed, 36 insertions(+), 15 deletions(-) diff --git a/R/plotBS.R b/R/plotBS.R index cf2fe8ec..c3571960 100644 --- a/R/plotBS.R +++ b/R/plotBS.R @@ -1,22 +1,35 @@ -support <- function(tree, trees, method="FBP", tol=1e-8, scale=TRUE){ +support <- function(tree, trees, method="FBP", tol=2e-8, scale=TRUE){ trees <- keep.tip(trees, tree$tip.label) - method <- match.arg(method, c("FBP", "TBE", "MCC")) + method <- match.arg(method, c("FBP", "TBE", "MCC"), several.ok=TRUE) + multi <- ifelse(length(method)>1, TRUE, FALSE) tip2root <- ifelse(method=="MCC", TRUE, FALSE) if(all(sapply(trees, \(x)!is.null(x$edge.length)))){ trees <- di2multi(trees, tol=tol) # , tip2root=tip2root) } - if(method=="FBP" || method=="MCC"){ + if(multi) X <- matrix(NA, Nnode(tree), length(method), + dimnames = list(NULL, method)) + if("MCC" %in% method){ trees <- .uncompressTipLabel(trees) # check if needed - if(method=="MCC" && any(!is.rooted(trees))) + if(any(!is.rooted(trees))) stop("All trees need to be rooted for method 'MCC'!") - if (method=="FBP" && any(is.rooted(trees))) trees <- unroot(trees) x <- prop.clades(tree, trees) x <- (x / length(trees)) if(!scale) x <- x * 100 + if(multi) X[, "MCC"] <- x } - else { + if("FBP" %in% method){ + trees <- .uncompressTipLabel(trees) # check if needed + if (any(is.rooted(trees))) trees <- unroot(trees) + x <- prop.clades(tree, trees) + x <- (x / length(trees)) + if(!scale) x <- x * 100 + if(multi) X[, "FBP"] <- x + } + if("TBE" %in% method){ x <- transferBootstrap(tree, trees, FALSE, scale=scale) + if(multi) X[, "TBE"] <- x } + if(multi) return(X) x } @@ -49,6 +62,7 @@ support <- function(tree, trees, method="FBP", tol=1e-8, scale=TRUE){ #' @param digits integer indicating the number of decimal places. #' @param p only plot support values higher than this percentage number #' (default is 0). +#' @param sep seperator between the different methods. #' @param \dots further parameters used by \code{plot.phylo}. #' @param frame a character string specifying the kind of frame to be printed #' around the bootstrap values. This must be one of "none" (the default), @@ -91,15 +105,17 @@ support <- function(tree, trees, method="FBP", tol=1e-8, scale=TRUE){ #' @export plotBS <- function(tree, trees, type = "phylogram", method="FBP", bs.col = "black", bs.adj = NULL, digits=3, p = 0, - frame = "none", tol=1e-6, ...) { + frame = "none", tol=1e-6, sep = "/", ...) { type <- match.arg(type, c("phylogram", "cladogram", "fan", "unrooted", "radial", "none")) if(inherits(tree, "pml")) tree <- tree$tree if(!inherits(tree, "phylo")) stop("tree must be of class phylo!") - method <- match.arg(method, c("FBP", "TBE", "MCC")) - +# method <- match.arg(method, c("FBP", "TBE", "MCC"), several.ok=TRUE) +# wird in support gecheckt if (hasArg(trees)) { - x <-support(tree, trees, method="FBP", tol=tol) + x <-support(tree, trees, method=method, tol=tol) + x <- round(x, digits=digits) + if(length(method)>1) x <- apply(x, 1, paste, collapse=sep) tree$node.label <- x } else { @@ -146,12 +162,14 @@ plotBS <- function(tree, trees, type = "phylogram", method="FBP", #' @rdname plotBS #' @export -add_support <- function(tree, trees,method="FBP", tol=1e-8, - scale=TRUE, frame="none", ...){ +add_support <- function(tree, trees, method="FBP", tol=1e-8, + scale=TRUE, frame="none", digits=3, sep="/", ...){ lastPP <- get("last_plot.phylo", envir = .PlotPhyloEnv) # if(!all.equal(lastPP$edge, reorder(tree)$edge)) # stop("tree seems to differ from the last plot!") - x <- support(tree, trees,method="FBP", tol=tol, scale=scale) + x <- support(tree, trees, method=method, tol=tol, scale=scale) + x <- round(x, digits=digits) + if(length(method)>1) x <- apply(x, 1, paste, collapse=sep) drawSupportOnEdges(x, frame=frame, ...) } diff --git a/man/plotBS.Rd b/man/plotBS.Rd index 2da59af9..04d50ea2 100644 --- a/man/plotBS.Rd +++ b/man/plotBS.Rd @@ -6,10 +6,11 @@ \title{Plotting trees with bootstrap values} \usage{ plotBS(tree, trees, type = "phylogram", method = "FBP", bs.col = "black", - bs.adj = NULL, digits = 3, p = 0, frame = "none", tol = 1e-06, ...) + bs.adj = NULL, digits = 3, p = 0, frame = "none", tol = 1e-06, + sep = "/", ...) add_support(tree, trees, method = "FBP", tol = 1e-08, scale = TRUE, - frame = "none", ...) + frame = "none", digits = 3, sep = "/", ...) } \arguments{ \item{tree}{The tree on which edges the bootstrap values are plotted.} @@ -41,6 +42,8 @@ around the bootstrap values. This must be one of "none" (the default), \item{tol}{a numeric value giving the tolerance to consider a branch length significantly greater than zero.} +\item{sep}{seperator between the different methods.} + \item{\dots}{further parameters used by \code{plot.phylo}.} \item{scale}{return ratio or percentage.} From c871d717f0fd7891e34cf84fe3d3db8574bb1aee Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Wed, 6 Sep 2023 18:05:49 +0200 Subject: [PATCH 064/216] small improvements to color schemes --- R/plotAnc.R | 9 ++++++--- R/zzz.R | 15 +++++++++------ man/plotAnc.Rd | 7 ++++--- 3 files changed, 19 insertions(+), 12 deletions(-) diff --git a/R/plotAnc.R b/R/plotAnc.R index b51d30be..66ebad3c 100644 --- a/R/plotAnc.R +++ b/R/plotAnc.R @@ -26,8 +26,9 @@ getTransition <- function(scheme, levels){ #' @param col a vector containing the colors for all possible states. #' @param cex.pie a numeric defining the size of the pie graphs. #' @param pos a character string defining the position of the legend. -#' @param scheme color scheme, for amino acids this can be "Ape_AA", -#' "Clustal_AA", "Hydrophobicity_AA" and "Zappo_AA" are available. +#' @param scheme a predefined color scheme. For amino acid options are "Ape_AA", +#' "Zappo_AA", "Clustal", "Polarity" and "Transmembrane_tendency", for +#' nucleotides "Ape_NT" and"RY_NT". Names can be abbreviated. #' @param \dots Further arguments passed to or from other methods. #' @author Klaus Schliep \email{klaus.schliep@@gmail.com} #' @seealso \code{\link{ancestral.pml}}, \code{\link[ape]{plot.phylo}} @@ -47,7 +48,7 @@ getTransition <- function(scheme, levels){ #' anc.ch <- ancestral.pars(tree, chloroplast) #' image(chloroplast[, 1:25]) #' plotAnc(tree, anc.ch, 21, scheme="Ape_AA") -#' plotAnc(tree, anc.ch, 21, scheme="Clustal_AA") +#' plotAnc(tree, anc.ch, 21, scheme="Clustal") #' @importFrom grDevices hcl.colors #' @export plotAnc <- function(tree, data, i = 1, site.pattern = FALSE, col = NULL, @@ -69,6 +70,8 @@ plotAnc <- function(tree, data, i = 1, site.pattern = FALSE, col = NULL, y <- matrix(unlist(y[]), ncol = nc, byrow = TRUE) } else y <- attr(data, "contrast")[unlist(y),] if(!is.null(scheme)){ + scheme <- match.arg(scheme, c("Ape_AA", "Zappo_AA", "Clustal", "Polarity", + "Transmembrane_tendency", "Ape_NT", "RY_NT")) sc <- get(scheme, environment(pml)) P <- getTransition(sc, levels) y <- y %*% P diff --git a/R/zzz.R b/R/zzz.R index 8d03c21b..0db88383 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -54,8 +54,6 @@ RY_NT <- list(properties = list( "-" = "-"), color=c("#FF00FF", "#00FFFF", "grey", "black")) - - Ape_AA <- list(properties = list( Hydrophobic = c("V", "I", "L", "F", "W", "Y", "M"), Small = c("P", "G", "A", "C"), @@ -63,7 +61,7 @@ Ape_AA <- list(properties = list( color=c("red", "yellow", "blue")) # Properties + Conservation (Clustal X) -Clustal_AA <- list(properties = list( +Clustal <- list(properties = list( Hydrophobic = c("A", "I", "L", "M", "F", "W", "V"), Positive = c("K", "R"), Negative = c("E", "D"), @@ -77,6 +75,13 @@ Clustal_AA <- list(properties = list( ) +Polarity <- list(properties = list( + "Non polar" = c("G", "A", "V", "L", "I", "F", "W", "M", "P"), + "Polar, uncharged" = c("S", "T", "C", "Y", "N", "Q"), + "Polar, acidic" = c("D", "E"), + "Polar, basic" = c("K", "R", "H")), + color = c("yellow", "green", "red", "blue")) + # Physicochemical Properties Zappo_AA <- list(properties = list( "Aliphatic/Hydrophobic" = c("I", "L", "V", "A", "M"), @@ -91,7 +96,7 @@ Zappo_AA <- list(properties = list( ) -Hydrophobicity_AA <- list(properties = list( +Transmembrane_tendency <- list(properties = list( Lys = "K", Asp = "D", Glu = "E", @@ -118,8 +123,6 @@ Hydrophobicity_AA <- list(properties = list( "#F1000D", "#FF0000")) - - # if rate g[i] is smaller than .gEps invariant site is increased by w[i] .gEps <- 1e-12 diff --git a/man/plotAnc.Rd b/man/plotAnc.Rd index 6b3aee59..d9269408 100644 --- a/man/plotAnc.Rd +++ b/man/plotAnc.Rd @@ -22,8 +22,9 @@ plotAnc(tree, data, i = 1, site.pattern = FALSE, col = NULL, \item{pos}{a character string defining the position of the legend.} -\item{scheme}{color scheme, for amino acids this can be "Ape_AA", -"Clustal_AA", "Hydrophobicity_AA" and "Zappo_AA" are available.} +\item{scheme}{a predefined color scheme. For amino acid options are "Ape_AA", +"Zappo_AA", "Clustal", "Polarity" and "Transmembrane_tendency", for +nucleotides "Ape_NT" and"RY_NT". Names can be abbreviated.} \item{\dots}{Further arguments passed to or from other methods.} } @@ -51,7 +52,7 @@ tree <- makeNodeLabel(tree) anc.ch <- ancestral.pars(tree, chloroplast) image(chloroplast[, 1:25]) plotAnc(tree, anc.ch, 21, scheme="Ape_AA") -plotAnc(tree, anc.ch, 21, scheme="Clustal_AA") +plotAnc(tree, anc.ch, 21, scheme="Clustal") } \seealso{ \code{\link{ancestral.pml}}, \code{\link[ape]{plot.phylo}} From 858967dc22169ebb6f8f8c617197d59a4016ef4d Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Thu, 7 Sep 2023 14:10:17 +0200 Subject: [PATCH 065/216] avoid warning --- DESCRIPTION | 3 +-- R/zzz.R | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 687898f6..fcb213b0 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -66,8 +66,7 @@ Suggests: tinytest, xtable LinkingTo: - Rcpp, RcppArmadillo -Remotes: github::emmanuelparadis/ape + Rcpp VignetteBuilder: knitr, utils diff --git a/R/zzz.R b/R/zzz.R index 0db88383..0c8e79e0 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -43,7 +43,7 @@ .nucleotideAlphabet <- c("a", "c", "g", "t") Ape_NT <- list(properties = list( - a="a", g="g", c="c", t="t"), #"n", "-")), + a="a", g="g", c="c", t="t", n="n", "-"="-"), color=c("red", "yellow", "green", "blue", "grey", "black")) From cac141506e398f8d19f0879b3f41a6b561737184 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Wed, 13 Sep 2023 12:02:26 +0200 Subject: [PATCH 066/216] improve man page --- R/treeManipulation.R | 11 +++++++---- man/midpoint.Rd | 11 +++++++---- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/R/treeManipulation.R b/R/treeManipulation.R index 055b1c74..a2111c56 100644 --- a/R/treeManipulation.R +++ b/R/treeManipulation.R @@ -68,13 +68,16 @@ changeEdgeLength <- function(tree, edge, edge.length) { #' #' \code{midpoint} performs midpoint rooting of a tree. \code{pruneTree} #' produces a consensus tree. -#' #' \code{pruneTree} prunes back a tree and produces a consensus tree, for trees #' already containing nodelabels. It assumes that nodelabels are numerical or #' character that allows conversion to numerical, it uses -#' as.numeric(as.character(tree$node.labels)) to convert them. \code{midpoint} -#' so far does not transform node.labels properly. -#' +#' as.numeric(as.character(tree$node.labels)) to convert them. +#' \code{midpoint} by default assumes that node labels contain support values. +#' This works if support values are computed from splits, but should be +#' recomputed for clades. +#' \code{keep.as.tip} takes a list of tips and/or node labels and returns a tree +#' pruned to those. If node label, then it prunes all descendants of that node +#' until that internal node becomes a tip. #' @param tree an object of class \code{phylo}. #' @param FUN a function evaluated on the nodelabels, result must be logical. diff --git a/man/midpoint.Rd b/man/midpoint.Rd index 3341064a..762fc918 100644 --- a/man/midpoint.Rd +++ b/man/midpoint.Rd @@ -40,13 +40,16 @@ the root node. \description{ \code{midpoint} performs midpoint rooting of a tree. \code{pruneTree} produces a consensus tree. -} -\details{ \code{pruneTree} prunes back a tree and produces a consensus tree, for trees already containing nodelabels. It assumes that nodelabels are numerical or character that allows conversion to numerical, it uses -as.numeric(as.character(tree$node.labels)) to convert them. \code{midpoint} -so far does not transform node.labels properly. +as.numeric(as.character(tree$node.labels)) to convert them. +\code{midpoint} by default assumes that node labels contain support values. +This works if support values are computed from splits, but should be +recomputed for clades. +\code{keep.as.tip} takes a list of tips and/or node labels and returns a tree +pruned to those. If node label, then it prunes all descendants of that node +until that internal node becomes a tip. } \examples{ From 5adee6e54ea471c8d202dc0bace7a38689dcfce7 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Wed, 13 Sep 2023 16:55:50 +0200 Subject: [PATCH 067/216] improve plot.networx --- R/consensusNet.R | 5 +++-- R/networx.R | 12 +++++++----- man/plot.networx.Rd | 6 +++++- 3 files changed, 15 insertions(+), 8 deletions(-) diff --git a/R/consensusNet.R b/R/consensusNet.R index 169d976d..6bf32f50 100644 --- a/R/consensusNet.R +++ b/R/consensusNet.R @@ -51,7 +51,8 @@ consensusNet <- function(obj, prob = 0.3, ...) { attr(spl, "confidences") <- (w / l)[ind] # attr(spl, "weights") = w[ind] res <- as.networx(spl) - res$edge.labels <- as.character(res$edge.length / l * 100) - res$edge.labels[res$edge[, 2] <= length(res$tip.label)] <- "" + res$edge.labels <- res$edge.length / l * 100 + #as.character(res$edge.length / l * 100) + res$edge.labels[res$edge[, 2] <= length(res$tip.label)] <- NA_real_ #"" reorder(res) } diff --git a/R/networx.R b/R/networx.R index 137df955..f9bf5141 100644 --- a/R/networx.R +++ b/R/networx.R @@ -646,6 +646,8 @@ rotate_matrix <- function(x, theta){ #' @param underscore a logical specifying whether the underscores in tip labels #' should be written as spaces (the default) or left as are (if TRUE). #' @param angle rotate the plot. +#' @param digits if edge labels are numerical a positive integer indicating how +#' many significant digits are to be used. #' @param \dots Further arguments passed to or from other methods. #' @rdname plot.networx #' @note The internal representation is likely to change. @@ -689,7 +691,7 @@ plot.networx <- function(x, type = "equal angle", use.edge.length = TRUE, cex.edge.label = cex, col.node.label = tip.color, col.edge.label = tip.color, font.node.label = font, font.edge.label = font, underscore = FALSE, - angle=0, ...) { + angle=0, digits=3, ...) { type <- match.arg(type, c("equal angle", "3D", "2D")) if (use.edge.length == FALSE){ x$edge.length[] <- 1 @@ -701,7 +703,7 @@ plot.networx <- function(x, type = "equal angle", use.edge.length = TRUE, if(!is.null(edge.label) && is.numeric(edge.label)) edge.label <- prettyNum(edge.label) if (is.null(edge.label) && !is.null(conf)) { conf <- conf[index] - if(is.numeric(conf)) conf <- prettyNum(conf) + if(is.numeric(conf)) conf <- prettyNum(format(conf, digits=digits)) if (!is.null(x$translate)) conf[match(x$translate$node, x$edge[, 2])] <- "" else conf[x$edge[, 2] <= nTips] <- "" edge.label <- conf @@ -893,9 +895,9 @@ plot2D <- function(coords, net, show.tip.label = TRUE, show.edge.label = FALSE, adj <- as.numeric(adj) ## `srt' takes only a single value, so can't vectorize this: ## (and need to 'elongate' these vectors:) - font <- rep(font, length.out = Ntip) - tip.color <- rep(tip.color, length.out = Ntip) - cex <- rep(cex, length.out = Ntip) + font <- rep(font, length.out = nTips) + tip.color <- rep(tip.color, length.out = nTips) + cex <- rep(cex, length.out = nTips) for (i in 1:length(label)) text(xx[i], yy[i], label[i], font = font[i], cex = cex[i], srt = angle[i], adj = adj[i], diff --git a/man/plot.networx.Rd b/man/plot.networx.Rd index 5f86bbc4..1ae7beb3 100644 --- a/man/plot.networx.Rd +++ b/man/plot.networx.Rd @@ -12,7 +12,8 @@ split.lty = NULL, font = 3, cex = par("cex"), cex.node.label = cex, cex.edge.label = cex, col.node.label = tip.color, col.edge.label = tip.color, font.node.label = font, - font.edge.label = font, underscore = FALSE, angle = 0, ...) + font.edge.label = font, underscore = FALSE, angle = 0, digits = 3, + ...) } \arguments{ \item{x}{an object of class \code{"networx"}} @@ -77,6 +78,9 @@ should be written as spaces (the default) or left as are (if TRUE).} \item{angle}{rotate the plot.} +\item{digits}{if edge labels are numerical a positive integer indicating how +many significant digits are to be used.} + \item{\dots}{Further arguments passed to or from other methods.} } \description{ From 7b4e8e0051c44fdc94d0277088a7cc6792c3d350 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Tue, 3 Oct 2023 19:09:53 +0200 Subject: [PATCH 068/216] bug fix if duplicated sequences exist --- R/parsimony.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/parsimony.R b/R/parsimony.R index 02deb276..a1a9c292 100644 --- a/R/parsimony.R +++ b/R/parsimony.R @@ -417,6 +417,7 @@ pratchet <- function(data, start = NULL, method = "fitch", maxit = 1000, trees <- optim.parsimony(p_trees, data, trace = trace, method = method, rearrangements = rearrangements, ...) } + curr_tree <- trees if(!is.null(attr(data, "duplicated"))){ p_trees <- addTaxa(p_trees, attr(data, "duplicated")) trees <- addTaxa(trees, attr(data, "duplicated")) @@ -430,7 +431,7 @@ pratchet <- function(data, start = NULL, method = "fitch", maxit = 1000, if ( (mp1 + eps) < mp) { kmax <- 1 result <- trees - tree <- trees + tree <- curr_tree hr <- hash(trees) mp <- mp1 } From abb6663db93a3df82d44dcaf218d997bd564f320 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Mon, 16 Oct 2023 15:10:52 +0200 Subject: [PATCH 069/216] improve examples --- R/pml_bb.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/pml_bb.R b/R/pml_bb.R index ef3bfe83..2b7826a0 100644 --- a/R/pml_bb.R +++ b/R/pml_bb.R @@ -41,10 +41,10 @@ #' @keywords cluster #' @examples #' -#' \dontrun{ #' data(woodmouse) -#' tmp <- pml_bb(woodmouse) +#' tmp <- pml_bb(woodmouse, model="HKY+I", rearrangement="NNI") #' +#' \dontrun{ #' data(Laurasiatherian) #' mt <- modelTest(Laurasiatherian) #' fit <- pml_bb(mt) From dfef4ea47724d5986ccfd47b057a78fce750bb7e Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Mon, 16 Oct 2023 15:11:12 +0200 Subject: [PATCH 070/216] bug fix --- R/plotAnc.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/plotAnc.R b/R/plotAnc.R index 66ebad3c..49da06cd 100644 --- a/R/plotAnc.R +++ b/R/plotAnc.R @@ -89,7 +89,8 @@ plotAnc <- function(tree, data, i = 1, site.pattern = FALSE, col = NULL, xrad <- CEX * diff(lastPP$x.lim * 1.1) / 50 par(new = TRUE) plot(tree, label.offset = 1.1 * xrad, plot = TRUE, ...) - if (is.null(col)) col <- hcl.colors(nc) #rainbow(nc) + if (is.null(col)) col <- hcl.colors(nc) #rainbow(nc) + if(!is.null(names(col))) col <- col[attr(data, "levels")] if (length(col) != nc) { warning("Length of color vector differs from number of levels!") } From 8647726fc31307046af9c7e1b911220ecf961b62 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Tue, 17 Oct 2023 17:48:55 +0200 Subject: [PATCH 071/216] add experimental function write.ancestral, interface might change --- R/ancestral_pml.R | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/R/ancestral_pml.R b/R/ancestral_pml.R index 0fa325ad..2c4b94f9 100644 --- a/R/ancestral_pml.R +++ b/R/ancestral_pml.R @@ -428,3 +428,23 @@ makeAncNodeLabel <- function(tree, ...){ # ind <- get("last_plot.phylo", envir = .PlotPhyloEnv)$edge[, 2] # edgelabels(prettyNum(x[ind]), frame = "none") #} + + +write.ancestral <- function(x, file="", ...){ + l <- length(x) + nr <- attr(x, "nr") + nc <- attr(x, "nc") + index <- attr(x, "index") + nr <- length(index) + nam <- names(x) + X <- matrix(0, l*length(index), nc) + j <- 0 + for(i in seq_len(l)){ + X[(j+1):(j+nr), ] <- x[[i]][index, ] + j <- j + nr + } + res <- data.frame(Node=rep(nam, each=nr), X) + colnames(res) <- c("Node", attr(x, "levels")) + if (file == "") return(res) + else write.csv(res, file=file, ...) +} From 71dd002ae2d9465347079c82a5d8b5935e6ec240 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Tue, 17 Oct 2023 17:51:18 +0200 Subject: [PATCH 072/216] update man page --- man/pml_bb.Rd | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/man/pml_bb.Rd b/man/pml_bb.Rd index c099dd84..333386f2 100644 --- a/man/pml_bb.Rd +++ b/man/pml_bb.Rd @@ -56,10 +56,10 @@ Currently very experimental and likely to change. } \examples{ -\dontrun{ data(woodmouse) -tmp <- pml_bb(woodmouse) +tmp <- pml_bb(woodmouse, model="HKY+I", rearrangement="NNI") +\dontrun{ data(Laurasiatherian) mt <- modelTest(Laurasiatherian) fit <- pml_bb(mt) From aa8c9c65cf0f0d02a3e108770ae7ba7162d74ded Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Tue, 17 Oct 2023 18:31:27 +0200 Subject: [PATCH 073/216] add as.phyDat.AAStringset, as.phyDat.DNAStringset going via AAbin/DNAbin objects --- R/phyDat_conversion.R | 21 +++++++++++++++++++++ man/as.phyDat.Rd | 9 +++++++++ 2 files changed, 30 insertions(+) diff --git a/R/phyDat_conversion.R b/R/phyDat_conversion.R index 08e7f267..a98c046f 100644 --- a/R/phyDat_conversion.R +++ b/R/phyDat_conversion.R @@ -87,6 +87,11 @@ as.phyDat.factor <- function(x, ...){ #' @export as.phyDat.DNAbin <- function(x, ...) phyDat.DNA(x, ...) +#' @rdname as.phyDat +#' @method as.phyDat AAbin +#' @export +as.phyDat.AAbin <- function(x, ...) phyDat.AA(x, ...) + #' @rdname as.phyDat #' @method as.phyDat alignment @@ -137,6 +142,22 @@ as.phyDat.MultipleAlignment <- function(x, ...){ } +#' @rdname as.phyDat +#' @method as.phyDat AAStringSet +#' @export +as.phyDat.AAStringSet <- function(x, ...){ + as.AAbin(x, ...) |> as.phyDat() +} + + +#' @rdname as.phyDat +#' @method as.phyDat DNAStringSet +#' @export +as.phyDat.DNAStringSet <- function(x, ...){ + as.DNAbin(x, ...) |> as.phyDat() +} + + # @rdname phyDat #' @export as.MultipleAlignment <- function (x, ...){ diff --git a/man/as.phyDat.Rd b/man/as.phyDat.Rd index 18057479..9e22c7d9 100644 --- a/man/as.phyDat.Rd +++ b/man/as.phyDat.Rd @@ -12,9 +12,12 @@ \alias{as.phyDat} \alias{as.phyDat.factor} \alias{as.phyDat.DNAbin} +\alias{as.phyDat.AAbin} \alias{as.phyDat.alignment} \alias{phyDat2alignment} \alias{as.phyDat.MultipleAlignment} +\alias{as.phyDat.AAStringSet} +\alias{as.phyDat.DNAStringSet} \alias{as.character.phyDat} \alias{as.data.frame.phyDat} \alias{as.DNAbin.phyDat} @@ -30,12 +33,18 @@ as.phyDat(x, ...) \method{as.phyDat}{DNAbin}(x, ...) +\method{as.phyDat}{AAbin}(x, ...) + \method{as.phyDat}{alignment}(x, type = "DNA", ...) phyDat2alignment(x) \method{as.phyDat}{MultipleAlignment}(x, ...) +\method{as.phyDat}{AAStringSet}(x, ...) + +\method{as.phyDat}{DNAStringSet}(x, ...) + \method{as.MultipleAlignment}{phyDat}(x, ...) \method{as.character}{phyDat}(x, allLevels = TRUE, ...) From 44d350a8fddc04b26343cf9a6b7190984560f26d Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Fri, 20 Oct 2023 12:00:21 +0200 Subject: [PATCH 074/216] change scalebar and suport values with cex --- R/plot_pml.R | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/R/plot_pml.R b/R/plot_pml.R index 6e91ef7b..c1200d6e 100644 --- a/R/plot_pml.R +++ b/R/plot_pml.R @@ -39,6 +39,8 @@ plot.pml <- function(x, type="phylogram", direction = "rightwards", ...){ type <- match.arg(type, c("phylogram","cladogram", "fan", "unrooted", "radial", "tidy")) tree <- x$tree + extras <- match.call(expand.dots = FALSE)$... + cex <- ifelse(is.null(extras$cex), par("cex"), extras$cex) if(!is.rooted(tree) && (type != "unrooted") ) tree <- midpoint(tree) plot.phylo(tree, type=type, direction=direction, ...) if(is.rooted(tree) && (type %in% c("phylogram","cladogram"))){ @@ -54,8 +56,8 @@ plot.pml <- function(x, type="phylogram", direction = "rightwards", ...){ axisPhylo(side, root.time = root_time, backward = FALSE) } else if(x$method=="ultrametric") axisPhylo(side) - else add.scale.bar() + else add.scale.bar(cex=cex) } - else add.scale.bar() - if(!is.null(x$bs)) add_support(tree, x$bs) + else add.scale.bar(cex=cex) + if(!is.null(x$bs)) add_support(tree, x$bs, cex=cex) } From 5cacdbb3538cea76236865b7589668f303a70231 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Tue, 24 Oct 2023 09:43:43 +0200 Subject: [PATCH 075/216] set trace=0 by default --- R/bab.R | 2 +- man/bab.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/bab.R b/R/bab.R index 63d1f4d3..1f4bb31c 100644 --- a/R/bab.R +++ b/R/bab.R @@ -166,7 +166,7 @@ pBound <- function(x, UB, LB) { #' trees <- bab(gene12) #' #' @export bab -bab <- function(data, tree = NULL, trace = 1, ...) { +bab <- function(data, tree = NULL, trace = 0, ...) { if(inherits(data, "DNAbin") | inherits(data, "AAbin")) data <- as.phyDat(data) if (!inherits(data, "phyDat")) stop("data must be of class phyDat") if (!is.null(tree)) data <- subset(data, tree$tip.label) diff --git a/man/bab.Rd b/man/bab.Rd index 853233f1..923d0cf2 100644 --- a/man/bab.Rd +++ b/man/bab.Rd @@ -5,7 +5,7 @@ \alias{BranchAndBound} \title{Branch and bound for finding all most parsimonious trees} \usage{ -bab(data, tree = NULL, trace = 1, ...) +bab(data, tree = NULL, trace = 0, ...) } \arguments{ \item{data}{an object of class phyDat.} From 975d23a7659099076675d07c3e0bd36de173e9d8 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Tue, 14 Nov 2023 16:14:09 +0100 Subject: [PATCH 076/216] improve as.data.frame.ancestral --- R/ancestral_pml.R | 43 +++++++++++++------------------------------ 1 file changed, 13 insertions(+), 30 deletions(-) diff --git a/R/ancestral_pml.R b/R/ancestral_pml.R index 2c4b94f9..d0adf3d9 100644 --- a/R/ancestral_pml.R +++ b/R/ancestral_pml.R @@ -200,18 +200,21 @@ as.phyDat.ancestral <- function(x, ...) { #' @export as.data.frame.ancestral <- function(x, ...) { stopifnot(inherits(x, "ancestral")) - lab <- names(x) - states <- attr(x, "levels") + l <- length(x) nr <- attr(x, "nr") nc <- attr(x, "nc") - pos <- seq_len(nr) - X <- unlist(x) |> array(c(nr, nc, length(x)), - dimnames = list(Site=pos, attr(x, "levels"), Node=names(x))) - z1 <- apply(X, 2L, c) - z2 <- expand.grid(dimnames(X)[c(1,3)]) - res <- data.frame(z2, z1) - res <- data.frame(z2, z1) - res[order(res[,1], res[,2]), ] + index <- attr(x, "index") + nr <- length(index) + nam <- names(x) + X <- matrix(0, l*length(index), nc) + j <- 0 + for(i in seq_len(l)){ + X[(j+1):(j+nr), ] <- x[[i]][index, ] + j <- j + nr + } + res <- data.frame(Site=rep(seq_len(nr), l), Node=rep(nam, each=nr), X) + colnames(res) <- c("Site", "Node", attr(x, "levels")) + res } @@ -428,23 +431,3 @@ makeAncNodeLabel <- function(tree, ...){ # ind <- get("last_plot.phylo", envir = .PlotPhyloEnv)$edge[, 2] # edgelabels(prettyNum(x[ind]), frame = "none") #} - - -write.ancestral <- function(x, file="", ...){ - l <- length(x) - nr <- attr(x, "nr") - nc <- attr(x, "nc") - index <- attr(x, "index") - nr <- length(index) - nam <- names(x) - X <- matrix(0, l*length(index), nc) - j <- 0 - for(i in seq_len(l)){ - X[(j+1):(j+nr), ] <- x[[i]][index, ] - j <- j + nr - } - res <- data.frame(Node=rep(nam, each=nr), X) - colnames(res) <- c("Node", attr(x, "levels")) - if (file == "") return(res) - else write.csv(res, file=file, ...) -} From ba3c44f2f0692635bef4c50b3dd65a114213f4a5 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Tue, 14 Nov 2023 16:14:31 +0100 Subject: [PATCH 077/216] simplify code --- R/candidate_tree.R | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/R/candidate_tree.R b/R/candidate_tree.R index 374831f5..2db4a653 100644 --- a/R/candidate_tree.R +++ b/R/candidate_tree.R @@ -45,11 +45,12 @@ candidate_tree <- function(x, method=c("unrooted", "ultrametric", "tipdated"), if(is.null(tip.dates)) stop("Argument tip.dates is missing!") if(is.null(names(tip.dates))) names(tip.dates) <- names(x) dm <- dist.ml(x, ...) - tree <- fastme.ols(dm) - tree <- rtt(tree, tip.dates[tree$tip.label]) - tree <- nnls.tree(dm, tree, method = "tipdated", - tip.dates=tip.dates[tree$tip.label]) - tree$tip.dates <- tip.dates[tree$tip.label] + tree <- supgma(dm, tip.dates) +# tree <- fastme.ols(dm) +# tree <- rtt(tree, tip.dates[tree$tip.label]) +# tree <- nnls.tree(dm, tree, method = "tipdated", +# tip.dates=tip.dates[tree$tip.label]) +# tree$tip.dates <- tip.dates[tree$tip.label] } minEdge(tree, tau=eps, enforce_ultrametric=enforce_ultrametric) } From 2a375c68631a9cefde895ccfd9e3ac15a1eba3e2 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Tue, 5 Dec 2023 15:14:06 +0100 Subject: [PATCH 078/216] fix documentation --- R/discrete.gamma.R | 2 +- man/discrete.gamma.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/discrete.gamma.R b/R/discrete.gamma.R index f4b5a204..25422071 100644 --- a/R/discrete.gamma.R +++ b/R/discrete.gamma.R @@ -31,7 +31,7 @@ #' @param \dots Further arguments passed to or from other methods. #' @return \code{discrete.gamma} returns a matrix. #' @author Klaus Schliep \email{klaus.schliep@@gmail.com} -#' @seealso \code{\link{pml.fit}, \link{stepfun}, link{pgamma}, link{pbeta}}, +#' @seealso \code{\link{pml.fit}, \link{stepfun}, \link{pgamma}, \link{pbeta}}, #' @examples #' discrete.gamma(1, 4) #' diff --git a/man/discrete.gamma.Rd b/man/discrete.gamma.Rd index 877e9061..a1f889e9 100644 --- a/man/discrete.gamma.Rd +++ b/man/discrete.gamma.Rd @@ -87,7 +87,7 @@ par(old.par) } \seealso{ -\code{\link{pml.fit}, \link{stepfun}, link{pgamma}, link{pbeta}}, +\code{\link{pml.fit}, \link{stepfun}, \link{pgamma}, \link{pbeta}}, } \author{ Klaus Schliep \email{klaus.schliep@gmail.com} From ea381f1b1014c4f2d29dc4146689052cf0511f4e Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Tue, 5 Dec 2023 15:24:36 +0100 Subject: [PATCH 079/216] more ratchet iterations --- DESCRIPTION | 2 +- R/pml_control.R | 2 +- man/pml.control.Rd | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index fcb213b0..421888da 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: phangorn Title: Phylogenetic Reconstruction and Analysis -Version: 2.12.0.900 +Version: 3.0.0 Authors@R: c(person("Klaus", "Schliep", role = c("aut", "cre"), email = "klaus.schliep@gmail.com", diff --git a/R/pml_control.R b/R/pml_control.R index 2174236f..cb8597a0 100644 --- a/R/pml_control.R +++ b/R/pml_control.R @@ -72,7 +72,7 @@ pml.control <- function(epsilon = 1e-08, maxit = 10, trace = 1, tau = 1e-8, #' @rdname pml.control #' @export -ratchet.control <- function(iter = 20L, maxit = 200L, minit = 50L, prop = 1/2, +ratchet.control <- function(iter = 20L, maxit = 200L, minit = 100L, prop = 1/2, rell = TRUE, bs=1000L){ if (!is.numeric(maxit) || maxit <= 0) stop("maximum number of iterations must be > 0") diff --git a/man/pml.control.Rd b/man/pml.control.Rd index 5b3d77ca..faaf81a7 100644 --- a/man/pml.control.Rd +++ b/man/pml.control.Rd @@ -8,7 +8,7 @@ pml.control(epsilon = 1e-08, maxit = 10, trace = 1, tau = 1e-08, statefreq = "empirical") -ratchet.control(iter = 20L, maxit = 200L, minit = 50L, prop = 1/2, +ratchet.control(iter = 20L, maxit = 200L, minit = 100L, prop = 1/2, rell = TRUE, bs = 1000L) } \arguments{ From f001d4804febe799cb86967158ac856ad7b30342 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Wed, 13 Dec 2023 18:24:19 +0100 Subject: [PATCH 080/216] several bug fies and improvements --- R/ancestral_pml.R | 65 ++++++++++++++--------------------------------- 1 file changed, 19 insertions(+), 46 deletions(-) diff --git a/R/ancestral_pml.R b/R/ancestral_pml.R index d0adf3d9..0b2958ce 100644 --- a/R/ancestral_pml.R +++ b/R/ancestral_pml.R @@ -88,9 +88,6 @@ ancestral.pml <- function(object, type = "marginal", return = "prob", ...) { x <- attributes(data) label <- makeAncNodeLabel(tree, ...) -# label <- as.character(1:m) -# nam <- tree$tip.label -# label[seq_along(nam)] <- nam x[["names"]] <- label tmp <- length(data) @@ -113,14 +110,11 @@ ancestral.pml <- function(object, type = "marginal", return = "prob", ...) { contrast <- attr(data, "contrast") # proper format eps <- 1.0e-5 - ind1 <- which(apply(contrast, 1, function(x) sum(x > eps)) == 1L) - ind2 <- which(contrast[ind1, ] > eps, arr.ind = TRUE) - - pos <- ind2[match(seq_len(ncol(contrast)), ind2[, 2]), 1] + attr <- attributes(data) + pos <- match(attr$levels, attr$allLevels) nco <- as.integer(dim(contrast)[1]) for (i in 1:l) dat[i, (nTips + 1):m] <- .Call('LogLik2', data, P[i, ], nr, nc, node, edge, nTips, mNodes, contrast, nco) - parent <- tree$edge[, 1] child <- tree$edge[, 2] nTips <- min(parent) - 1 @@ -161,16 +155,11 @@ ancestral.pml <- function(object, type = "marginal", return = "prob", ...) { } -# joint_reconstruction <- function(object){ -# -# } - #' @rdname ancestral.pml #' @export as.phyDat.ancestral <- function(x, ...) { type <- attr(x, "type") - # else res[1:ntips] <- data[1:ntips] fun2 <- function(x) { x <- p2dna(x) fitchCoding2ambiguous(x) @@ -181,14 +170,9 @@ as.phyDat.ancestral <- function(x, ...) { else { eps <- 1.0e-5 contr <- attr(x, "contrast") - # a bit too complicated - ind1 <- which(apply(contr, 1, function(x) sum(x > eps)) == 1L) - ind2 <- which(contr[ind1, ] > eps, arr.ind = TRUE) - # pos <- ind2[match(as.integer(1L:ncol(contr)), ind2[,2]),1] - pos <- ind2[match(seq_len(ncol(contr)), ind2[, 2]), 1] - # only first hit + attr <- attributes(x) + pos <- match(attr$levels, attr$allLevels) res <- lapply(x, function(x, pos) pos[max.col(x)], pos) - } attributes(res) <- attributes(x) class(res) <- "phyDat" @@ -283,9 +267,6 @@ mpr <- function(tree, data, cost = NULL, return = "prob", ...) { l <- length(tree$tip.label) m <- length(res) label <- makeAncNodeLabel(tree, ...) -# label <- as.character(1:m) -# nam <- tree$tip.label -# label[seq_along(nam)] <- nam att[["names"]] <- label ntips <- length(tree$tip.label) contrast <- att$contrast @@ -301,7 +282,7 @@ mpr <- function(tree, data, cost = NULL, return = "prob", ...) { for (i in (ntips + 1):m) res[[i]][] <- as.numeric(res[[i]] < RM) if (return == "prob") { # for(i in 1:ntips) res[[i]] <- contrast[data[[i]],,drop=FALSE] - if (return == "prob") res <- lapply(res, fun) + res <- lapply(res, fun) attributes(res) <- att class(res) <- c("ancestral", "phyDat") } @@ -316,6 +297,7 @@ mpr <- function(tree, data, cost = NULL, return = "prob", ...) { attributes(res) <- att } else { + attributes(res) <- att res <- as.phyDat.ancestral(res) } res[1:ntips] <- data @@ -339,7 +321,7 @@ acctran2 <- function(tree, data) { f$traverse(edge) if(length(tmp)>0)f$acctran_traverse(tmp) psc <- f$pscore_acctran(edge) - el <- psc #[edge[,2]] + el <- psc parent <- unique(edge[,1]) desc <- Descendants(tree, parent, "children") for(i in seq_along(parent)){ @@ -385,9 +367,15 @@ ptree <- function(tree, data, return = "prob", acctran=TRUE, ...) { if(length(tmp)>0 && acctran==TRUE)f$acctran_traverse(tmp) res <- vector("list", m) att$names <- makeAncNodeLabel(tree, ...) -# att$names <- c(att$names, as.character((nTip+1):m)) - - if(return == "prob") { + if(type=="DNA" && return != "prob"){ + indx <- c(1, 2, 6, 3, 7, 9, 12, 4, 8, 10, 13, 11, 14, 15, 16) + res[1:nTip] <- data[1:nTip] + for(i in (nTip+1):m) + res[[i]] <- indx[f$getAncAmb(i)[1:nr]] + attributes(res) <- att + return(res) + } + else { fun <- function(X) { rs <- rowSums(X) X / rs @@ -399,17 +387,10 @@ ptree <- function(tree, data, return = "prob", acctran=TRUE, ...) { attributes(res) <- att class(res) <- c("ancestral", "phyDat") } - else { - if(type=="DNA"){ - indx <- c(1, 2, 6, 3, 7, 9, 12, 4, 8, 10, 13, 11, 14, 15, 16) - res[1:nTip] <- data[1:nTip] - for(i in (nTip+1):m) - res[[i]] <- indx[f$getAncAmb(i)[1:nr]] - attributes(res) <- att - } - else stop("This is only for nucleotide sequences supported so far") + if(return != "prob"){ + res <- as.phyDat.ancestral(res) + class(res) <- "phyDat" } -# attributes(res) <- att res } @@ -423,11 +404,3 @@ makeAncNodeLabel <- function(tree, ...){ tree <- makeNodeLabel(tree, ...) c(tree$tip.label, tree$node.label) } - -#parsimony.plot <- function(tree, ...) { -# x <- numeric(max(tree$edge)) -# x[tree$edge[, 2]] <- tree$edge.length -# plot(tree, ...) -# ind <- get("last_plot.phylo", envir = .PlotPhyloEnv)$edge[, 2] -# edgelabels(prettyNum(x[ind]), frame = "none") -#} From 486af8becf5f270022db364df653f61989c2815b Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Mon, 18 Dec 2023 10:29:34 +0100 Subject: [PATCH 081/216] improve man page --- R/parsimony.R | 15 +++++++++------ man/parsimony.Rd | 12 +++++++----- 2 files changed, 16 insertions(+), 11 deletions(-) diff --git a/R/parsimony.R b/R/parsimony.R index a1a9c292..c22e75c2 100644 --- a/R/parsimony.R +++ b/R/parsimony.R @@ -1,13 +1,16 @@ #' Parsimony tree. #' +#' \code{pratchet} implements the parsimony ratchet (Nixon, 1999) and is the +#' preferred way to search for the best tree. For small trees function +#' \code{\link{bab}} can be used to compute all most parsimonious trees. #' #' \code{parsimony} returns the parsimony score of a tree using either the -#' sankoff or the fitch algorithm. \code{optim.parsimony} tries to find the -#' maximum parsimony tree using either Nearest Neighbor Interchange (NNI) -#' rearrangements or sub tree pruning and regrafting (SPR). \code{pratchet} -#' implements the parsimony ratchet (Nixon, 1999) and is the preferred way to -#' search for the best tree. \code{random.addition} can be used to produce -#' starting trees. +#' sankoff or the fitch algorithm. +#' \code{optim.parsimony} optimizes the topology using either Nearest Neighbor +#' Interchange (NNI) rearrangements or sub tree pruning and regrafting (SPR) and +#' is used inside \code{pratchet}. \code{random.addition} can be used to produce +#' starting trees and is an option for the argument perturbation in +#' \code{pratchet}. #' #' The "SPR" rearrangements are so far only available for the "fitch" method, #' "sankoff" only uses "NNI". The "fitch" algorithm only works correct for diff --git a/man/parsimony.Rd b/man/parsimony.Rd index e925456c..5e7163c0 100644 --- a/man/parsimony.Rd +++ b/man/parsimony.Rd @@ -67,12 +67,14 @@ found during the search. \code{acctran} returns a tree with edge length according to the ACCTRAN criterion. } \description{ +\code{pratchet} implements the parsimony ratchet (Nixon, 1999) and is the +preferred way to search for the best tree. For small trees \code{\link{bab}} +to compute all most parsimonious trees \code{parsimony} returns the parsimony score of a tree using either the -sankoff or the fitch algorithm. \code{optim.parsimony} tries to find the -maximum parsimony tree using either Nearest Neighbor Interchange (NNI) -rearrangements or sub tree pruning and regrafting (SPR). \code{pratchet} -implements the parsimony ratchet (Nixon, 1999) and is the preferred way to -search for the best tree. \code{random.addition} can be used to produce +sankoff or the fitch algorithm. +\code{optim.parsimony} optimizes the topology using either Nearest Neighbor +Interchange (NNI) rearrangements or sub tree pruning and regrafting (SPR) and +is used inside \code{pratchet}. \code{random.addition} can be used to produce starting trees. } \details{ From 1d6557503f8c9897b80ad608f5cd9b8848c21cd4 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Mon, 18 Dec 2023 11:42:07 +0100 Subject: [PATCH 082/216] cleaning up, improve ratchet --- R/phylo.R | 107 ++++++++++++++++++++++++++++++++---------------------- 1 file changed, 63 insertions(+), 44 deletions(-) diff --git a/R/phylo.R b/R/phylo.R index f0ed3511..3acfecb4 100644 --- a/R/phylo.R +++ b/R/phylo.R @@ -123,7 +123,7 @@ subsChoice_USER <- function(type = .usermodels, nstates) { optimGamma <- function(tree, data, shape = 1, k = 4, ...) { fn <- function(shape, tree, data, k, ...) pml.fit(tree, data, shape = shape, k = k, ...) - res <- optimize(f = fn, interval = c(0.1, 100), lower = 0.1, upper = 1000, + res <- optimize(f = fn, interval = c(0.1, 100), lower = 0.1, upper = 100, maximum = TRUE, tol = .01, tree = tree, data = data, k = k, ...) res } @@ -1479,14 +1479,14 @@ optimRooted <- function(tree, data, bf, g, w, eig, ll.0, loglik } # scale whole tree - scaleEdges <- function(tree, data, tau = 1e-8, ...) { #t = 1, trace = 0, + scaleEdges <- function(tree, data, tau = 1e-8, ...) { fn <- function(t, tree, data, ...) { tree$edge.length <- tree$edge.length * t pml.fit4(tree, data, ...) } min_scaler <- max(.25, tau / min(tree$edge.length) ) min_scaler <- min(min_scaler, 1) - if(min_scaler>1) browser() +# if(min_scaler>1) browser() optimize(f = fn, interval = c(min_scaler, 4), tree = tree, data = data, ..., maximum = TRUE, tol = .00001) } @@ -2002,6 +2002,7 @@ optim.pml <- function(object, optNni = FALSE, optBf = FALSE, optQ = FALSE, optNni <- ifelse(rearrangement == "none", FALSE, TRUE) perturbation <- ifelse(rearrangement %in% c("ratchet", "stochastic", "multi2di"), TRUE, FALSE) + if(rearrangement=="ratchet") fbs <- vector("list", ratchet.par$minit) extras <- match.call(expand.dots = FALSE)$... pmla <- c("wMix", "llMix") wMix <- object$wMix @@ -2241,6 +2242,11 @@ optim.pml <- function(object, optNni = FALSE, optBf = FALSE, optQ = FALSE, spl <- as.splits(bs) object$tree <- addConfidences(object$tree, spl) } + if(rearrangement=="ratchet"){ + class(fbs) <- "multiPhylo" + object$abs <- object$bs + object$bs <- fbs + } pml.free() return(object) }) @@ -2343,35 +2349,40 @@ optim.pml <- function(object, optNni = FALSE, optBf = FALSE, optQ = FALSE, } if (optFreeRate) { # bis jetzt w nicht optimiert! - tmp_ll <- ll - res <- optimFreeRate(tree, data, g = g, k = k, w = w, inv = inv, + tmp_ll <- ll_fr <- ll + eps_fr <- 1e8 + iter_fr <- 0 + while(eps_fr > control$epsilon & iter_fr < 3){ + res <- optimFreeRate(tree, data, g = g, k = k, w = w, inv = inv, INV = INV, bf = bf, eig = eig, ll.0 = ll.0, rate = rate) - scale <- function(tree, g, w){ - blub <- sum(g * w) - g <- g / blub - tree$edge.length <- tree$edge.length * blub - list(tree=tree, g=g) - } - if(res[[2]] > ll){ - tmp_sc <- scale(tree, res[[1]], w) - g0 <- res[[1]] - blub <- sum(g0 * w) - g <- g0 / blub - tree$edge.length <- tree$edge.length * blub -## if (trace > 0) cat("optimize free rate parameters: ", ll, "-->", -## max(res[[2]], ll), "\n") - ll <- res[[2]] - } - res2 <- optimWs(tree, data, w = w, g=g, inv = inv, - INV = INV, bf = bf, eig = eig, - ll.0 = ll.0, rate = rate) - if(res2[[2]] > ll){ - w <- res2[[1]] - blub <- sum(g * w) - g <- g / blub - tree$edge.length <- tree$edge.length * blub - ll <- res2[[2]] + scale <- function(tree, g, w){ + blub <- sum(g * w) + g <- g / blub + tree$edge.length <- tree$edge.length * blub + list(tree=tree, g=g) + } + if(res[[2]] > ll){ + tmp_sc <- scale(tree, res[[1]], w) + g0 <- res[[1]] + blub <- sum(g0 * w) + g <- g0 / blub + tree$edge.length <- tree$edge.length * blub + ll <- res[[2]] + } + res2 <- optimWs(tree, data, w = w, g=g, inv = inv, + INV = INV, bf = bf, eig = eig, + ll.0 = ll.0, rate = rate) + if(res2[[2]] > ll){ + w <- res2[[1]] + blub <- sum(g * w) + g <- g / blub + tree$edge.length <- tree$edge.length * blub + ll <- res2[[2]] + } + eps_fr <- ll - ll_fr + ll_fr <- ll + iter_fr <- iter_fr+1 } if (trace > 0) cat("optimize free rate parameters: ", tmp_ll, "-->", ll, "\n") @@ -2410,6 +2421,16 @@ optim.pml <- function(object, optNni = FALSE, optBf = FALSE, optQ = FALSE, if((rearrangement == "stochastic" || rearrangement == "ratchet") && optRooted){ dm <- dist.ml(data, bf=bf, Q=Q, exclude = "pairwise") } + ratchet_fun <- function(tree, data, ...){ + weight <- attr(data, "weight") + v <- rep(seq_along(weight), weight) + attr(data, "weight") <- tabulate(sample(v, replace = TRUE), length(weight)) + + res <- opt_Edge(tree, data, ...) + ll2 <- res[[2]] + opt_nni(tree, data, iter_max=5, trace=0, ll=ll2, ...)$tree +# opt_nni(tree, data, ...)$tree + } for(i in seq_len(maxit)){ if(rearrangement == "stochastic"){ tree2 <- di2multi(tree, tol = 10 * tau, tip2root = TRUE) @@ -2426,11 +2447,17 @@ optim.pml <- function(object, optNni = FALSE, optBf = FALSE, optQ = FALSE, tree2 <- minEdge(tree2, 10*tau) } } else if(rearrangement == "ratchet"){ - tree2 <- bootstrap.phyDat(data, candidate_tree, bs = 1, method=method, - eps = tau, bf = bf, Q = Q, k = k, shape = shape, - tip.dates=tip.dates)[[1]] - tree2 <- checkLabels(tree2, tree$tip.label) - tree2 <- reorder(tree2, "postorder") + tree2 <- ratchet_fun(tree, data, rooted=optRooted, w = w, g = g, + eig = eig, bf = bf, inv=inv, + rate=rate, ll.0 = ll.0, INV = INV, llMix = llMix, + wMix=wMix, ASC=ASC, + control=list(eps=1e-08, maxit=3, trace=trace-1, tau=tau)) + fbs[[i]] <- tree2 +# tree2 <- bootstrap.phyDat(data, candidate_tree, bs = 1, method=method, +# eps = tau, bf = bf, Q = Q, k = k, shape = shape, +# tip.dates=tip.dates)[[1]] +# tree2 <- checkLabels(tree2, tree$tip.label) +# tree2 <- reorder(tree2, "postorder") } else if(rearrangement == "multi2di"){ tree2 <- di2multi(tree, tol=10*tau, tip2root=TRUE) if(any(degree(tree2)>4)){ @@ -2595,8 +2622,6 @@ optimQuartet <- function(tree, data, eig, w, g, bf, rate, ll.0, nTips, tau <- control$tau lg <- k ScaleEPS <- 1.0 / 4294967296.0 - # anc <- Ancestors(tree, 1:m, "parent") - # anc0 <- as.integer(c(0L, anc)) while (eps > control$eps && iter < control$maxit) { EL <- .Call("optQrtt", as.integer(parent), as.integer(child), eig, evi, @@ -2630,10 +2655,6 @@ pml.quartet <- function(tree, data, bf = rep(.25, 4), k = 1, rate = 1, g, w, if (is.null(ll.0)) { ll.0 <- numeric(nr) } -# if (is.null(ind.ll0)) { -# ind <- which(ll.0 > 0) -# } -# else ind <- ind.ll0 node <- as.integer(tree$edge[, 1] - nTips - 1L) # min(node)) edge <- as.integer(tree$edge[, 2] - 1L) @@ -2704,7 +2725,6 @@ pml.nni <- function(tree, data, w, g, eig, bf, ll.0, ll, inv, wMix, llMix, tree0 <- index2tree(INDEX[i, ], tree, nTips + 1L) ch <- ei[5] pa <- ei[6] - # move up while (pa != loli) { tmpr <- match(loli, INDEX[, 5]) @@ -2736,7 +2756,6 @@ pml.nni <- function(tree, data, w, g, eig, bf, ll.0, ll, inv, wMix, llMix, ll.0 = ll.0, nTips = nTips, weight = weight, nr = nr, nc = nc, contrast = contrast, nco = nco, inv=inv, llcomp = ll + 1e-8, wMix=wMix, llMix=llMix, ...) - # new0$logLik+1e-8) new2 <- optimQuartet(tree2, data, eig = eig, w = w, g = g, bf = bf, ll.0 = ll.0, nTips = nTips, weight = weight, nr = nr, nc = nc, contrast = contrast, nco = nco, inv=inv, @@ -2825,7 +2844,7 @@ opt_nni <- function(tree, data, rooted, iter_max, trace, ll, RELL=NULL, ...){ } if(!is.null(RELL)) RELL <- tmp$RELL ll2 <- res$logLik - if(length(ll2)==0) browser() +# if(length(ll2)==0) browser() if(ll2 > (ll + 1e-8)) # epsR tree <- res$tree else { From 98e09d75fb2a5625426cf177eee7598138209138 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Thu, 11 Jan 2024 12:06:58 +0100 Subject: [PATCH 083/216] bug fix reported by John Rhodes --- R/distTree.R | 152 +++++++++++++++++++++++++--------------------- R/networx.R | 2 +- man/designTree.Rd | 14 +++-- 3 files changed, 94 insertions(+), 74 deletions(-) diff --git a/R/distTree.R b/R/distTree.R index e82fdda5..2774bb19 100644 --- a/R/distTree.R +++ b/R/distTree.R @@ -1,48 +1,3 @@ -#' UPGMA and WPGMA -#' -#' UPGMA and WPGMA clustering. Just a wrapper function around -#' \code{\link[stats]{hclust}}. -#' -#' -#' @param D A distance matrix. -#' @param method The agglomeration method to be used. This should be (an -#' unambiguous abbreviation of) one of "ward", "single", "complete", "average", -#' "mcquitty", "median" or "centroid". The default is "average". -#' @param \dots Further arguments passed to or from other methods. -#' @return A phylogenetic tree of class \code{phylo}. -#' @author Klaus Schliep \email{klaus.schliep@@gmail.com} -#' @seealso \code{\link{hclust}}, \code{\link{dist.hamming}}, \code{\link{NJ}}, -#' \code{\link{as.phylo}}, \code{\link{fastme}}, \code{\link{nnls.tree}} -#' @keywords cluster -#' @examples -#' -#' data(Laurasiatherian) -#' dm <- dist.ml(Laurasiatherian) -#' tree <- upgma(dm) -#' plot(tree) -#' -#' @rdname upgma -#' @export -"upgma" <- function(D, method = "average", ...) { - DD <- as.dist(D) - hc <- hclust(DD, method = method, ...) - result <- as.phylo(hc) - result <- reorder(result, "postorder") - result -} - - -#' @rdname upgma -#' @export -"wpgma" <- function(D, method = "mcquitty", ...) { - DD <- as.dist(D) - hc <- hclust(DD, method = method, ...) - result <- as.phylo(hc) - result <- reorder(result, "postorder") - result -} - - #' Neighbor-Joining #' #' This function performs the neighbor-joining tree estimation of Saitou and @@ -157,7 +112,12 @@ UNJ <- function(x){ #' Weighted least squares is used with weights w, i.e., sum(w * e^2) is #' minimized. #' @param balanced use weights as in balanced fastME -#' @param tip.dates a vector of sampling times associated to the tips of tree. +#' @param tip.dates a named vector of sampling times associated to the tips of +#' the tree. +#' @param calibration a named vector of calibration times associated to nodes of +#' the tree. +#' @param eps minimum edge length (default s 1e-8). +## @param strict strict calibration. #' @return \code{nnls.tree} return a tree, i.e. an object of class #' \code{phylo}. \code{designTree} and \code{designSplits} a matrix, possibly #' sparse. @@ -181,10 +141,11 @@ UNJ <- function(x){ #' @rdname designTree #' @export designTree <- function(tree, method = "unrooted", sparse = FALSE, - tip.dates=NULL, ...) { + tip.dates=NULL, calibration=NULL, ...) { # , strict=TRUE method <- match.arg(method, c("unrooted", "ultrametric", "rooted", "tipdated")) if(method == "rooted") method <- "ultrametric" + if(has.singles(tree)) tree <- collapse.singles(tree) #if (!is.na(pmatch(method, "all"))) # method <- "unrooted" #METHOD <- c("unrooted", "rooted", "tipdated") @@ -230,9 +191,10 @@ designUnrooted <- function(tree, sparse=FALSE, order = NULL) { } -designUltra <- function(tree, sparse = TRUE) { +designUltra <- function(tree, sparse = TRUE, calibration=NULL) { if (is.null(attr(tree, "order")) || attr(tree, "order") != "postorder") tree <- reorder(tree, "postorder") +# stopifnot( !(!is.null(calibration) && is.null(tree$node.label))) leri <- allChildren(tree) bp <- bip(tree) n <- length(tree$tip.label) @@ -342,7 +304,10 @@ designUnrooted2 <- function(tree, sparse = TRUE) { } -designTipDated <- function(tree, tip.dates, sparse = TRUE){ +designTipDated <- function(tree, tip.dates, sparse=TRUE){ + #, strict=TRUE + #if(!is.numeric(tip.dates)) browser() + #if(!length(tip.dates) >= Ntip(tree)) browser() stopifnot(is.numeric(tip.dates), length(tip.dates) >= Ntip(tree)) nTip <- Ntip(tree) tmp <- function(n){ @@ -363,11 +328,56 @@ designTipDated <- function(tree, tip.dates, sparse = TRUE){ } +designCalibrated <- function(tree, sparse=TRUE, calibration=NULL){ + #, tip.dates=NULL, strict=TRUE + #stopifnot(is.numeric(tip.dates), length(tip.dates) >= Ntip(tree)) + nTip <- Ntip(tree) + #if(!is.null(tree$node.label)) + cname <- tree$node.label +# nodes <- integer(tree$Nnode) + X <- designUltra(tree, sparse=sparse) + #if(!is.null(tree$node.label)) + colnames(X) <- cname + x <- X[, names(calibration), drop=FALSE] %*% calibration + X <- cbind(X[,-match(names(calibration), cname)], rate=x) +# nodes <- attr(X, "nodes") +# X <- cbind(X, x) +# colnames(X) <- c(nodes, -1) +# attr(X, "nodes") <- nodes + X +} + + +designConstrained <- function(tree, sparse=TRUE, tip.dates=NULL, + calibration=NULL){ + stopifnot(is.numeric(tip.dates), length(tip.dates) >= Ntip(tree)) + X <- designUltra(tree, sparse=sparse) + nTip <- Ntip(tree) + # designTipDated + if(!is.null(tip.dates)){ + tmp <- function(n){ + x1 <- rep(seq_len(n), each=n) + x2 <- rep(seq_len(n), n) + ind <- x1 < x2 + sparseMatrix(i = rep(seq_len(sum(ind)), 2), j = c(x1[ind], x2[ind])) + } + } + if(!is.null(calibration)){ + cname <- tree$node.label + colnames(X) <- cname + x <- X[, names(calibration), drop=FALSE] %*% calibration + X <- cbind(X[,-match(names(calibration), cname)], rate=x) + } + + +} + #' @rdname designTree #' @export nnls.tree <- function(dm, tree, method=c("unrooted", "ultrametric", "tipdated"), rooted=NULL, trace=1, weight=NULL, balanced=FALSE, tip.dates=NULL) { method <- match.arg(method, c("unrooted", "ultrametric", "tipdated")) + if(has.singles(tree)) tree <- collapse.singles(tree) if (is.rooted(tree) && method == "unrooted") tree <- unroot(tree) tree <- reorder(tree, "postorder") if (balanced) { @@ -423,7 +433,10 @@ nnls.tree <- function(dm, tree, method=c("unrooted", "ultrametric", "tipdated"), RSS <- sum((y - (X %*% betahattmp))^2) if (trace > 1) print(paste("RSS:", RSS)) attr(tree, "RSS") <- RSS - if(method=="tipdated") betahat <- betahat / rate + if(method=="tipdated"){ + betahat <- betahat / rate + attr(tree, "rate") <- rate + } tree$edge.length <- betahat return(tree) } @@ -496,7 +509,7 @@ nnls.phylo <- function(x, dm, method = "unrooted", trace = 0, ...) { #' @rdname designTree #' @export -nnls.splits <- function(x, dm, trace = 0) { +nnls.splits <- function(x, dm, trace = 0, eps = 1e-8) { labels <- attr(x, "labels") dm <- as.matrix(dm) k <- dim(dm)[1] @@ -519,23 +532,24 @@ nnls.splits <- function(x, dm, trace = 0) { dvec <- crossprod(X, y) betahat <- as.vector(solve(Dmat, dvec)) - if (!any(betahat < 0)) { - RSS <- sum((y - (X %*% betahat))^2) - if (trace > 1) print(paste("RSS:", RSS)) - attr(x, "RSS") <- RSS - attr(x, "weights") <- betahat - return(x) - } - n <- dim(X)[2] +# if (!any(betahat < 0)) { +# RSS <- sum((y - (X %*% betahat))^2) +# if (trace > 1) print(paste("RSS:", RSS)) +# attr(x, "RSS") <- RSS +# attr(x, "weights") <- betahat +# return(x) +# } int <- lengths(x) - - # quadratic programing - Amat <- matrix(1, 1, n) - Aind <- matrix(0L, 2L, n) - Aind[1, ] <- 1L - Aind[2, ] <- as.integer(1L:n) - betahat <- quadprog::solve.QP.compact(as.matrix(Dmat), as.vector(dvec), Amat, - Aind)$sol + if (any(betahat < 0)) { + n <- dim(X)[2] + # quadratic programing + Amat <- matrix(1, 1, n) + Aind <- matrix(0L, 2L, n) + Aind[1, ] <- 1L + Aind[2, ] <- as.integer(1L:n) + betahat <- quadprog::solve.QP.compact(as.matrix(Dmat), as.vector(dvec), + Amat, Aind)$sol + } RSS <- sum((y - (X %*% betahat))^2) ind <- (betahat > 1e-8) | int == 1 x <- x[ind] @@ -548,10 +562,10 @@ nnls.splits <- function(x, dm, trace = 0) { #' @rdname designTree #' @export -nnls.networx <- function(x, dm) { +nnls.networx <- function(x, dm, eps = 1e-8) { # spl <- attr(x, "splits") spl <- x$splits - spl2 <- nnls.splits(spl, dm) + spl2 <- nnls.splits(spl, dm, eps=eps) weight <- attr(spl, "weight") weight[] <- 0 weight[match(spl2, spl)] <- attr(spl2, "weight") diff --git a/R/networx.R b/R/networx.R index f9bf5141..887cdcb9 100644 --- a/R/networx.R +++ b/R/networx.R @@ -156,7 +156,7 @@ circNetwork <- function(x, ord = NULL) { X <- as.matrix(x)[, ord] Y <- X rsY <- rowSums(Y) - X <- X[ind, ] + X <- X[ind, , drop=FALSE] for (k in seq_along(ind)) { Vstart <- ord[1] diff --git a/man/designTree.Rd b/man/designTree.Rd index c14e809c..30a62297 100644 --- a/man/designTree.Rd +++ b/man/designTree.Rd @@ -10,7 +10,7 @@ \title{Compute a design matrix or non-negative LS} \usage{ designTree(tree, method = "unrooted", sparse = FALSE, tip.dates = NULL, - ...) + calibration = NULL, ...) nnls.tree(dm, tree, method = c("unrooted", "ultrametric", "tipdated"), rooted = NULL, trace = 1, weight = NULL, balanced = FALSE, @@ -18,9 +18,9 @@ nnls.tree(dm, tree, method = c("unrooted", "ultrametric", "tipdated"), nnls.phylo(x, dm, method = "unrooted", trace = 0, ...) -nnls.splits(x, dm, trace = 0) +nnls.splits(x, dm, trace = 0, eps = 1e-08) -nnls.networx(x, dm) +nnls.networx(x, dm, eps = 1e-08) designSplits(x, splits = "all", ...) } @@ -31,7 +31,11 @@ designSplits(x, splits = "all", ...) \item{sparse}{return a sparse design matrix.} -\item{tip.dates}{a vector of sampling times associated to the tips of tree.} +\item{tip.dates}{a named vector of sampling times associated to the tips of +the tree.} + +\item{calibration}{a named vector of calibration times associated to nodes of +the tree.} \item{\dots}{further arguments, passed to other methods.} @@ -49,6 +53,8 @@ minimized.} \item{x}{number of taxa.} +\item{eps}{minimum edge length (default s 1e-8).} + \item{splits}{one of "all", "star".} } \value{ From 5709e0b59e4f9a35f03ae17c26f771378b721f13 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Fri, 12 Jan 2024 13:16:44 +0100 Subject: [PATCH 084/216] Improve man page --- R/parsimony.R | 5 +++-- man/parsimony.Rd | 13 ++++++++----- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/R/parsimony.R b/R/parsimony.R index c22e75c2..6c40abf4 100644 --- a/R/parsimony.R +++ b/R/parsimony.R @@ -1,8 +1,9 @@ #' Parsimony tree. #' #' \code{pratchet} implements the parsimony ratchet (Nixon, 1999) and is the -#' preferred way to search for the best tree. For small trees function -#' \code{\link{bab}} can be used to compute all most parsimonious trees. +#' preferred way to search for the best parsimony tree. For small number of taxa +#' the function \code{\link{bab}} can be used to compute all most parsimonious +#' trees. #' #' \code{parsimony} returns the parsimony score of a tree using either the #' sankoff or the fitch algorithm. diff --git a/man/parsimony.Rd b/man/parsimony.Rd index 5e7163c0..714d415e 100644 --- a/man/parsimony.Rd +++ b/man/parsimony.Rd @@ -68,16 +68,19 @@ according to the ACCTRAN criterion. } \description{ \code{pratchet} implements the parsimony ratchet (Nixon, 1999) and is the -preferred way to search for the best tree. For small trees \code{\link{bab}} -to compute all most parsimonious trees +preferred way to search for the best parsimony tree. For small number of taxa +the function \code{\link{bab}} can be used to compute all most parsimonious +trees. +} +\details{ \code{parsimony} returns the parsimony score of a tree using either the sankoff or the fitch algorithm. \code{optim.parsimony} optimizes the topology using either Nearest Neighbor Interchange (NNI) rearrangements or sub tree pruning and regrafting (SPR) and is used inside \code{pratchet}. \code{random.addition} can be used to produce -starting trees. -} -\details{ +starting trees and is an option for the argument perturbation in +\code{pratchet}. + The "SPR" rearrangements are so far only available for the "fitch" method, "sankoff" only uses "NNI". The "fitch" algorithm only works correct for binary trees. From 37d76efbf2a93042e385fea61fe9ed5e01db0911 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Fri, 12 Jan 2024 16:44:11 +0100 Subject: [PATCH 085/216] add references --- vignettes/phangorn.bib | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/vignettes/phangorn.bib b/vignettes/phangorn.bib index 4a0fff4e..4cfd0aa6 100644 --- a/vignettes/phangorn.bib +++ b/vignettes/phangorn.bib @@ -1341,3 +1341,28 @@ @article{Ota2000 } +@article{Pupko2000, + author = {Pupko, Tal and Pe, Itsik and Shamir, Ron and Graur, Dan}, + title = "{A Fast Algorithm for Joint Reconstruction of Ancestral Amino Acid Sequences}", + journal = {Molecular Biology and Evolution}, + volume = {17}, + number = {6}, + pages = {890-896}, + year = {2000}, + month = {06}, + issn = {0737-4038}, + doi = {10.1093/oxfordjournals.molbev.a026369}, +} + + +@article{Koshi1996, + author = {Koshi, Jeffrey M. and Goldstein, Richard A.}, + title = "{Probabilistic reconstruction of ancestral protein sequences}", + journal = {Journal of Molecular Evolution}, + volume = {42}, + number = {2}, + pages = {313-320}, + year = {1996}, + doi = {10.1007/BF02198858}, +} + From 9d091b352d85bad0511710444bc3d5c7b0ee3c9a Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Fri, 12 Jan 2024 16:45:03 +0100 Subject: [PATCH 086/216] small improvements --- vignettes/Ancestral.Rmd | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/vignettes/Ancestral.Rmd b/vignettes/Ancestral.Rmd index b00cc890..9b6831e6 100644 --- a/vignettes/Ancestral.Rmd +++ b/vignettes/Ancestral.Rmd @@ -74,9 +74,9 @@ title("ACCTRAN") # Likelihood reconstructions -_phangorn_ also offers the possibility to estimate ancestral states using a ML. +_phangorn_ also offers the possibility to estimate ancestral states using ML. The advantages of ML over parsimony is that the reconstruction accounts for different edge lengths. -So far only a marginal construction is implemented (see [@Yang2006]). +So far only a marginal construction is implemented (see [@Yang2006][@Koshi1996]) and no joint reconstruction [@Pupko2000]. ```{r fit_ML} fit <- pml(tree, primates) fit <- optim.pml(fit, model="F81", control = pml.control(trace=0)) @@ -108,7 +108,7 @@ title("Bayes") # Fitting for discrete comparative data Often have already a phylogeny and only want estimate the ancestral reconstruction for this tree. -This is a common problem in phylogentic comparative methods and we can use the function *ace* in the ape [@Paradis2018], *fitDiscrete* in the geiger [@Pennell2014] or *fitMK* in the phytools [@Revell2012] package. Here we want fit these models using *optim.pml*. +This is a common problem in phylogentic comparative methods and we can use the function *ace* in the ape [@Paradis2018], *fitDiscrete* in the geiger [@Pennell2014] or *fitMK* in the phytools [@Revell2012] package. Here we want to show how to fit these models using *optim.pml*. First we load a tree and create some data. ```{r read_geospiza_data} From b34fc4ac42fae215821a2666883dc67859c89541 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Mon, 15 Jan 2024 12:05:50 +0100 Subject: [PATCH 087/216] Fix phylip webpage --- R/phyDat.R | 2 +- R/read.phyDat.R | 2 +- man/read.aa.Rd | 2 +- man/read.phyDat.Rd | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/R/phyDat.R b/R/phyDat.R index 22b12325..64b6728e 100644 --- a/R/phyDat.R +++ b/R/phyDat.R @@ -510,7 +510,7 @@ constSitePattern <- function(n, names=NULL, type="DNA", levels=NULL){ #' #' Felsenstein, J. (1993) Phylip (Phylogeny Inference Package) version 3.5c. #' Department of Genetics, University of Washington. -#' \url{https://evolution.genetics.washington.edu/phylip/phylip.html} +#' \url{https://phylipweb.github.io/phylip/} #' @keywords IO #' @export read.aa read.aa <- function (file, format = "interleaved", skip = 0, nlines = 0, diff --git a/R/read.phyDat.R b/R/read.phyDat.R index 8865739e..bd11a28a 100644 --- a/R/read.phyDat.R +++ b/R/read.phyDat.R @@ -57,7 +57,7 @@ read.fasta.user <- function (file, skip = 0, nlines = 0, #' \url{https://www.ncbi.nlm.nih.gov/blast/fasta.shtml} Felsenstein, J. (1993) #' Phylip (Phylogeny Inference Package) version 3.5c. Department of Genetics, #' University of Washington. -#' \url{https://evolution.genetics.washington.edu/phylip/phylip.html} +#' \url{https://phylipweb.github.io/phylip/} #' @examples #' fdir <- system.file("extdata/trees", package = "phangorn") #' primates <- read.phyDat(file.path(fdir, "primates.dna"), diff --git a/man/read.aa.Rd b/man/read.aa.Rd index 347ceb22..2e391bde 100644 --- a/man/read.aa.Rd +++ b/man/read.aa.Rd @@ -41,7 +41,7 @@ names. Felsenstein, J. (1993) Phylip (Phylogeny Inference Package) version 3.5c. Department of Genetics, University of Washington. -\url{https://evolution.genetics.washington.edu/phylip/phylip.html} +\url{https://phylipweb.github.io/phylip/} } \seealso{ \code{\link[ape]{read.dna}}, \code{\link[ape]{read.GenBank}}, diff --git a/man/read.phyDat.Rd b/man/read.phyDat.Rd index e62c4325..5d838b1c 100644 --- a/man/read.phyDat.Rd +++ b/man/read.phyDat.Rd @@ -56,7 +56,7 @@ primates <- read.phyDat(file.path(fdir, "primates.dna"), \url{https://www.ncbi.nlm.nih.gov/blast/fasta.shtml} Felsenstein, J. (1993) Phylip (Phylogeny Inference Package) version 3.5c. Department of Genetics, University of Washington. -\url{https://evolution.genetics.washington.edu/phylip/phylip.html} +\url{https://phylipweb.github.io/phylip/} } \seealso{ \code{\link[ape]{read.dna}}, \code{\link[ape]{read.GenBank}}, From 819ace8afcd23ac6d53b871d94682ee708d489a3 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Mon, 15 Jan 2024 14:30:59 +0100 Subject: [PATCH 088/216] reorganise code --- R/upgma.R | 103 +++++++++++++++++++++++++++++++++++++++++++++++++++ man/upgma.Rd | 39 ++++++++++++++++--- 2 files changed, 136 insertions(+), 6 deletions(-) create mode 100644 R/upgma.R diff --git a/R/upgma.R b/R/upgma.R new file mode 100644 index 00000000..b79d02c0 --- /dev/null +++ b/R/upgma.R @@ -0,0 +1,103 @@ +#' UPGMA, WPGMA and sUPGMA +#' +#' UPGMA and WPGMA clustering. UPGMA and WPGMA are a wrapper function around +#' \code{\link[stats]{hclust}} retuning a \code{phylo} object. UPGMA +#' additionally performs nearest neighbor interchange (NNI) tree rearrangements +#' to improve the phylogeny (Schliep et al. 2023). +#' \code{supgma} perform serial sampled UPGMA similar to Drummond and Rodrigo +#' (2000) however also performing NNI rearrangements. +#' +#' +#' @param D A distance matrix. +#' @param method The agglomeration method to be used. This should be (an +#' unambiguous abbreviation of) one of "ward", "single", "complete", "average", +#' "mcquitty", "median" or "centroid". The default is "average". +#' @param NNI logical whether make nearest neighbor rearrangements to improve the +#' tree. Currently only available for \code{method="average"}. +#' @param trace Show output during optimization (see details). +#' @param tip.dates A named vector of sampling times associated to the tips. +#' @param \dots Further arguments passed to or from other methods. +#' @return A phylogenetic tree of class \code{phylo}. +#' @author Klaus Schliep \email{klaus.schliep@@gmail.com} +#' @seealso \code{\link{hclust}}, \code{\link{dist.hamming}}, \code{\link{NJ}}, +#' \code{\link{as.phylo}}, \code{\link{fastme}}, \code{\link{nnls.tree}}, +#' \code{\link{rtt}} +#' @references Sneath, P. H., & Sokal, R. R. (1973). \emph{Numerical taxonomy. +#' The principles and practice of numerical classification.} +#' +#' Sokal, R. R., & Michener, C. D. (1958). A statistical method for evaluating +#' systematic relationships. \emph{University of Kansas Scientific Bulletin}, +#' v. 38. +#' +#' Drummond, A., & Rodrigo, A. G. (2000). Reconstructing genealogies of serial +#' samples under the assumption of a molecular clock using serial-sample UPGMA. +#' \emph{Molecular Biology and Evolution}, \bold{17(12)}, 1807-1815. +#' @keywords cluster +#' @examples +#' +#' data(Laurasiatherian) +#' dm <- dist.ml(Laurasiatherian) +#' tree <- upgma(dm) +#' plot(tree) +#' +#' @rdname upgma +#' @export +upgma <- function(D, method = "average", NNI=FALSE, ...) { + DD <- as.dist(D) + hc <- hclust(DD, method = method) + result <- as.phylo(hc) +# if(NNI){ +# stopifnot(method=="average") +# result <- upgma_nni(DD, tree=result, ...) +# } + result <- reorder(result, "postorder") + result +} + + +#' @rdname upgma +#' @export +wpgma <- function(D, method = "mcquitty", ...) { + DD <- as.dist(D) + hc <- hclust(DD, method = method, ...) + result <- as.phylo(hc) + result <- reorder(result, "postorder") + result +} + + + +#' @rdname upgma +#' @export +supgma <- function(D, tip.dates, trace=0){ + tree <- fastme.ols(D) + tree <- checkLabels(tree, attr(D, "Labels")) + tree <- rtt(tree, tip.dates[tree$tip.label]) + tree <- nnls.tree(D, tree, method = "tipdated", + tip.dates=tip.dates[tree$tip.label]) + rate_0 <- attr(tree, "rate") + dm_td <- designTipDated(tree, tip.dates[tree$tip.label]) + dm_td <- dm_td[, ncol(dm_td)] + me_start <- sum(tree$edge.length* rate_0) + if(trace) cat("ME: ", me_start, "rate:", rate_0, "\n") + me_best <- me_start + me <- 0 + iter <- TRUE + swapi <- 1 + while(iter){ + D_tmp <- D - rate_0 * dm_td + tree_tmp <- upgma(D_tmp) + swapi <- attr(tree_tmp, "swap") + tree_tmp <- nnls.tree(D, tree_tmp, method = "tipdated", + tip.dates=tip.dates[tree$tip.label]) + rate_0 <- attr(tree_tmp, "rate") + me <- sum(tree_tmp$edge.length * rate_0) + if(trace) cat("ME: ", me, "rate:", rate_0, "\n") + if(me < me_best){ + tree <- tree_tmp + me_best <- me + } else iter <- FALSE + } + tree$tip.dates <- tip.dates[tree$tip.label] + tree +} diff --git a/man/upgma.Rd b/man/upgma.Rd index b03bdfaa..038e6dbe 100644 --- a/man/upgma.Rd +++ b/man/upgma.Rd @@ -1,13 +1,16 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/distTree.R +% Please edit documentation in R/upgma.R \name{upgma} \alias{upgma} \alias{wpgma} -\title{UPGMA and WPGMA} +\alias{supgma} +\title{UPGMA, WPGMA and sUPGMA} \usage{ -upgma(D, method = "average", ...) +upgma(D, method = "average", NNI = FALSE, ...) wpgma(D, method = "mcquitty", ...) + +supgma(D, tip.dates, trace = 0) } \arguments{ \item{D}{A distance matrix.} @@ -16,14 +19,25 @@ wpgma(D, method = "mcquitty", ...) unambiguous abbreviation of) one of "ward", "single", "complete", "average", "mcquitty", "median" or "centroid". The default is "average".} +\item{NNI}{logical whether make nearest neighbor rearrangements to improve the +tree. Currently only available for \code{method="average"}.} + \item{\dots}{Further arguments passed to or from other methods.} + +\item{tip.dates}{A named vector of sampling times associated to the tips.} + +\item{trace}{Show output during optimization (see details).} } \value{ A phylogenetic tree of class \code{phylo}. } \description{ -UPGMA and WPGMA clustering. Just a wrapper function around -\code{\link[stats]{hclust}}. +UPGMA and WPGMA clustering. UPGMA and WPGMA are a wrapper function around +\code{\link[stats]{hclust}} retuning a \code{phylo} object. UPGMA +additionally performs nearest neighbor interchange (NNI) tree rearrangements +to improve the phylogeny (Schliep et al. 2023). +\code{supgma} perform serial sampled UPGMA similar to Drummond and Rodrigo +(2000) however also performing NNI rearrangements. } \examples{ @@ -32,10 +46,23 @@ dm <- dist.ml(Laurasiatherian) tree <- upgma(dm) plot(tree) +} +\references{ +Sneath, P. H., & Sokal, R. R. (1973). \emph{Numerical taxonomy. +The principles and practice of numerical classification.} + +Sokal, R. R., & Michener, C. D. (1958). A statistical method for evaluating +systematic relationships. \emph{University of Kansas Scientific Bulletin}, +v. 38. + +Drummond, A., & Rodrigo, A. G. (2000). Reconstructing genealogies of serial +samples under the assumption of a molecular clock using serial-sample UPGMA. +\emph{Molecular Biology and Evolution}, \bold{17(12)}, 1807-1815. } \seealso{ \code{\link{hclust}}, \code{\link{dist.hamming}}, \code{\link{NJ}}, -\code{\link{as.phylo}}, \code{\link{fastme}}, \code{\link{nnls.tree}} +\code{\link{as.phylo}}, \code{\link{fastme}}, \code{\link{nnls.tree}}, +\code{\link{rtt}} } \author{ Klaus Schliep \email{klaus.schliep@gmail.com} From 8a6a8486e130c57aeedbe14c028c107ab8a3cdfa Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Mon, 15 Jan 2024 14:38:36 +0100 Subject: [PATCH 089/216] update --- NAMESPACE | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/NAMESPACE b/NAMESPACE index 99b7b174..60295801 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -20,6 +20,9 @@ S3method(as.data.frame,phyDat) S3method(as.matrix,splits) S3method(as.networx,phylo) S3method(as.networx,splits) +S3method(as.phyDat,AAStringSet) +S3method(as.phyDat,AAbin) +S3method(as.phyDat,DNAStringSet) S3method(as.phyDat,DNAbin) S3method(as.phyDat,MultipleAlignment) S3method(as.phyDat,alignment) @@ -222,6 +225,7 @@ export(simSeq) export(splitsNetwork) export(sprdist) export(superTree) +export(supgma) export(threshStateC) export(tidy) export(transferBootstrap) @@ -290,8 +294,10 @@ importFrom(stats,AIC) importFrom(stats,BIC) importFrom(stats,aggregate) importFrom(stats,as.dist) +importFrom(stats,binomial) importFrom(stats,constrOptim) importFrom(stats,cophenetic) +importFrom(stats,dbinom) importFrom(stats,dgamma) importFrom(stats,ecdf) importFrom(stats,hclust) From 3dbe65e46942f5b5b564b58c371e33530b92cfb0 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Mon, 15 Jan 2024 15:50:50 +0100 Subject: [PATCH 090/216] try to avoid errors --- DESCRIPTION | 1 + 1 file changed, 1 insertion(+) diff --git a/DESCRIPTION b/DESCRIPTION index 421888da..8a01e567 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -67,6 +67,7 @@ Suggests: xtable LinkingTo: Rcpp +Remotes: github::KlausVigo/apex VignetteBuilder: knitr, utils From 95456b0f0e9953573e951d3a49976dc56f7eb70d Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Mon, 15 Jan 2024 17:20:08 +0100 Subject: [PATCH 091/216] try to avoid errors --- DESCRIPTION | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 8a01e567..3985f35c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -67,7 +67,8 @@ Suggests: xtable LinkingTo: Rcpp -Remotes: github::KlausVigo/apex +Remotes: github::KlausVigo/apex, + github::EmmanuelParadis/ape VignetteBuilder: knitr, utils From 8d32c6c5676ebdb911b13c961167502f5fe494fa Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Mon, 15 Jan 2024 17:30:33 +0100 Subject: [PATCH 092/216] test --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 3985f35c..1213886e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: phangorn Title: Phylogenetic Reconstruction and Analysis -Version: 3.0.0 +Version: 3.0.0.0 Authors@R: c(person("Klaus", "Schliep", role = c("aut", "cre"), email = "klaus.schliep@gmail.com", From 927a46151bcb551aeb38b6b6c5926683e6bec56f Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Mon, 15 Jan 2024 18:17:32 +0100 Subject: [PATCH 093/216] test if tests work now again --- R/phylo.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/phylo.R b/R/phylo.R index 3acfecb4..67863777 100644 --- a/R/phylo.R +++ b/R/phylo.R @@ -124,7 +124,7 @@ optimGamma <- function(tree, data, shape = 1, k = 4, ...) { fn <- function(shape, tree, data, k, ...) pml.fit(tree, data, shape = shape, k = k, ...) res <- optimize(f = fn, interval = c(0.1, 100), lower = 0.1, upper = 100, - maximum = TRUE, tol = .01, tree = tree, data = data, k = k, ...) + maximum = TRUE, tol = .001, tree = tree, data = data, k = k, ...) res } From ebd9d80c34b5f5d1866476de2d736e78ffd725e0 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Tue, 16 Jan 2024 17:48:20 +0100 Subject: [PATCH 094/216] add_edge_length assign now also edge length to an splits graph (networx) --- R/draw_CI.R | 18 ++++++++++++++---- man/add_edge_length.Rd | 3 ++- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/R/draw_CI.R b/R/draw_CI.R index c4d78dff..a92dea8d 100644 --- a/R/draw_CI.R +++ b/R/draw_CI.R @@ -1,5 +1,6 @@ edge_length_matrix <- function(tree, trees, rooted=TRUE){ if(!inherits(trees, "multiPhylo")) stop("Trees must be of class multiPhylo!") + if(inherits(tree, "networx")) rooted <- FALSE trees <- .uncompressTipLabel(trees) |> .compressTipLabel(ref=tree$tip.label) if(!rooted){ trees <- unroot(trees) @@ -13,7 +14,9 @@ edge_length_matrix <- function(tree, trees, rooted=TRUE){ el[x$edge[,2]] <- x$edge.length el } - bp <- bip(tree) + if(inherits(tree, "networx")) bp <- tree$splits + else bp <- bip(tree) + if(!rooted) bp <- SHORTwise(bp) m <- length(bp) res <- matrix(NA_real_, length(trees), m) @@ -35,7 +38,8 @@ edge_length_matrix <- function(tree, trees, rooted=TRUE){ ##' @title Assign and compute edge lengths from a sample of trees ##' @description This command can infer some average edge lengths and assign ##' them from a (bootstrap/MCMC) sample. -##' @param tree tree where edge lengths are assigned to. +##' @param tree a phylogenetic tree or splitnetwork where edge lengths are +##' assigned to. ##' @param trees an object of class multiPhylo, where the average for the edges ##' is computed from. ##' @param fun a function to compute the average (default is median). @@ -63,8 +67,14 @@ add_edge_length <- function(tree, trees, fun=\(x)median(na.omit(x)), if(!rooted) tree <- unroot(tree) X <- edge_length_matrix(tree, trees, rooted) nh <- apply(X, 2, fun) - if(rooted) tree$edge.length <- nh[tree$edge[,1]] - nh[tree$edge[,2]] - else tree$edge.length <- nh[tree$edge[,2]] + if(inherits(tree, "networx")){ + tree$edge.length <- nh[tree$splitIndex] + attr(tree$splits, "weights") <- nh + } + else{ + if(rooted) tree$edge.length <- nh[tree$edge[,1]] - nh[tree$edge[,2]] + else tree$edge.length <- nh[tree$edge[,2]] + } tree } diff --git a/man/add_edge_length.Rd b/man/add_edge_length.Rd index 734c8874..337e6a90 100644 --- a/man/add_edge_length.Rd +++ b/man/add_edge_length.Rd @@ -8,7 +8,8 @@ add_edge_length(tree, trees, fun = function(x) median(na.omit(x)), rooted = all(is.rooted(trees))) } \arguments{ -\item{tree}{tree where edge lengths are assigned to.} +\item{tree}{a phylogenetic tree or splitnetwork where edge lengths are +assigned to.} \item{trees}{an object of class multiPhylo, where the average for the edges is computed from.} From 2f66600c0028b9fa6d39d421e39a016bdb8a0144 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Thu, 25 Jan 2024 13:48:54 +0100 Subject: [PATCH 095/216] fix bug reported by @amizeranschi --- R/phylo.R | 36 +++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/R/phylo.R b/R/phylo.R index 67863777..30be873c 100644 --- a/R/phylo.R +++ b/R/phylo.R @@ -700,23 +700,23 @@ optEdgeMulti <- function(object, control = pml.control(epsilon = 1e-8, # add data for internal use parent.frame(n) for higher nestings -update.pmlNew <- function(object, ..., evaluate = TRUE) { - call <- object$call - if (is.null(call)) - stop("need an object with call component") - extras <- match.call(expand.dots = FALSE)$... - if (length(extras)) { - existing <- !is.na(match(names(extras), names(call))) - for (a in names(extras)[existing]) call[[a]] <- extras[[a]] - if (any(!existing)) { - call <- c(as.list(call), extras[!existing]) - call <- as.call(call) - } - } - if (evaluate) - eval(call, object, parent.frame()) - else call -} +# update.pmlNew <- function(object, ..., evaluate = TRUE) { +# call <- object$call +# if (is.null(call)) +# stop("need an object with call component") +# extras <- match.call(expand.dots = FALSE)$... +# if (length(extras)) { +# existing <- !is.na(match(names(extras), names(call))) +# for (a in names(extras)[existing]) call[[a]] <- extras[[a]] +# if (any(!existing)) { +# call <- c(as.list(call), extras[!existing]) +# call <- as.call(call) +# } +# } +# if (evaluate) +# eval(call, object, parent.frame()) +# else call +# } #' @export @@ -2064,6 +2064,8 @@ optim.pml <- function(object, optNni = FALSE, optBf = FALSE, optQ = FALSE, orig.data <- data addTaxa <- TRUE tree <- drop.tip(tree, mapping[, 1]) + if(method=="unrooted") tree <- unroot(tree) + attr(tree, "order") <- NULL tree <- reorder(tree, "postorder") } } From 5c3aebaf6c38f319b2b7b8d9f2cc1cd99ef566d6 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Thu, 25 Jan 2024 14:26:25 +0100 Subject: [PATCH 096/216] don't depend on apex --- DESCRIPTION | 3 +-- R/read.nexus.partitions.R | 10 +++++----- man/read.nexus.partitions.Rd | 3 +-- 3 files changed, 7 insertions(+), 9 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 1213886e..d2da01dc 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -55,7 +55,6 @@ Imports: stats, utils Suggests: - apex, Biostrings, knitr, magick, @@ -76,5 +75,5 @@ biocViews: Software, Technology, QualityControl Encoding: UTF-8 Repository: CRAN Roxygen: list(old_usage = TRUE) -RoxygenNote: 7.2.3 +RoxygenNote: 7.3.0 Language: en-US diff --git a/R/read.nexus.partitions.R b/R/read.nexus.partitions.R index 6ef8c70b..a43dfabc 100644 --- a/R/read.nexus.partitions.R +++ b/R/read.nexus.partitions.R @@ -42,8 +42,8 @@ read.nexus.charset <- function(file){ #' the data according to the charsets givb in the SETS block. #' #' @param file a file name. -#' @param return either return a list where eeach element is a 'phyDat' object -#' or an object of class 'multiphyDat' +#' @param return either returns a list where each element is a 'phyDat' object. +## or an object of class 'multiphyDat' #' @param \dots Further arguments passed to or from other methods. #' @author Klaus Schliep \email{klaus.schliep@@gmail.com} #' @seealso \code{\link{read.nexus.data}}, \code{\link{read.phyDat}} @@ -69,13 +69,13 @@ read.nexus.charset <- function(file){ #' @rdname read.nexus.partitions #' @export read.nexus.partitions <- function(file, return="list", ...){ - return <- match.arg(return, c("list", "multiphyDat")) +# return <- match.arg(return, c("list", "multiphyDat")) dat <- read.phyDat(file, format="nexus", ...) genes <- read.nexus.charset(file) if(is.null(genes)) stop(paste(file, "does not contain Charset!")) seq <- lapply(genes, \(x, dat)dat[,x], dat) names(seq) <- names(genes) - if(return=="multiphyDat" && requireNamespace("apex")) - seq <- new("multiphyDat", seq = seq, add.gaps = FALSE) +# if(return=="multiphyDat" && requireNamespace("apex")) +# seq <- new("multiphyDat", seq = seq, add.gaps = FALSE) seq } diff --git a/man/read.nexus.partitions.Rd b/man/read.nexus.partitions.Rd index 40bf5a4a..3891e38a 100644 --- a/man/read.nexus.partitions.Rd +++ b/man/read.nexus.partitions.Rd @@ -9,8 +9,7 @@ read.nexus.partitions(file, return = "list", ...) \arguments{ \item{file}{a file name.} -\item{return}{either return a list where eeach element is a 'phyDat' object -or an object of class 'multiphyDat'} +\item{return}{either returns a list where each element is a 'phyDat' object.} \item{\dots}{Further arguments passed to or from other methods.} } From 7a6c2d931149284dd3963c27b84be240571c5109 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Wed, 31 Jan 2024 13:52:35 +0100 Subject: [PATCH 097/216] remove duplicated code --- R/treedist.R | 136 ++++++++++++--------------------------------------- 1 file changed, 32 insertions(+), 104 deletions(-) diff --git a/R/treedist.R b/R/treedist.R index 341bd335..4097e4df 100644 --- a/R/treedist.R +++ b/R/treedist.R @@ -56,6 +56,22 @@ cophenetic.networx <- function(x) { } +# helper functions +fun1 <- function(x) { + w <- numeric(max(x$edge)) + w[x$edge[, 2]] <- x$edge.length + w +} + + +fun2 <- function(x, rooted=FALSE) { + bp <- bip(x) + if(!rooted) bp <- SHORTwise(bp) + bp <- sapply(bp, paste, collapse = "_") + bp +} + + ## @aliases treedist RF.dist wRF.dist KF.dist path.dist sprdist SPR.dist #' Distances between trees #' @@ -343,6 +359,7 @@ wRF0 <- function(tree1, tree2, normalize = FALSE, check.labels = TRUE, r2 <- is.rooted(tree2) if (r1 != r2) { message("one tree is unrooted, unrooted both") + rooted <- FALSE } if (!rooted) { if (r1) @@ -350,25 +367,14 @@ wRF0 <- function(tree1, tree2, normalize = FALSE, check.labels = TRUE, if (r2) tree2 <- unroot(tree2) } - if (!r1 | !r2) { - if (r1) - tree1 <- unroot(tree1) - if (r2) - tree2 <- unroot(tree2) - } if (!is.binary(tree1) | !is.binary(tree2)) message("Trees are not binary!") if (check.labels) tree2 <- checkLabels(tree2, tree1$tip.label) if (has.singles(tree1)) tree1 <- collapse.singles(tree1) if (has.singles(tree2)) tree2 <- collapse.singles(tree2) - bp1 <- bip(tree1) - bp2 <- bip(tree2) - if (!rooted) { - bp1 <- SHORTwise(bp1) - bp2 <- SHORTwise(bp2) - } - bp1 <- sapply(bp1, paste, collapse = "_") - bp2 <- sapply(bp2, paste, collapse = "_") + + bp1 <- fun2(tree1, rooted) + bp2 <- fun2(tree2, rooted) w1 <- numeric(max(tree1$edge)) w2 <- numeric(max(tree2$edge)) @@ -414,34 +420,13 @@ wRF2 <- function(tree, trees, normalize = FALSE, check.labels = TRUE, unclass(trees) nTips <- length(tree$tip.label) - - fun1 <- function(x) { - w <- numeric(max(x$edge)) - w[x$edge[, 2]] <- x$edge.length - w - } W <- lapply(trees, fun1) - fun2 <- function(x, nTips) { - bp <- bip(x) - bp <- SHORTwise(bp) - bp <- sapply(bp, paste, collapse = "_") - bp - } - fun3 <- function(x, nTips) { - bp <- bip(x) - bp <- sapply(bp, paste, collapse = "_") - bp - } - if (rooted) BP <- lapply(trees, fun3, nTips) - else BP <- lapply(trees, fun2, nTips) + BP <- lapply(trees, fun2, rooted) if (!rooted && is.rooted(tree)) tree <- unroot(tree) - bp <- bip(tree) - - if (!rooted) bp <- SHORTwise(bp) - bp <- sapply(bp, paste, collapse = "_") + bp <- fun2(tree, rooted) w <- numeric(max(tree$edge)) w[tree$edge[, 2]] <- tree$edge.length @@ -485,26 +470,12 @@ wRF1 <- function(trees, normalize = FALSE, check.labels = TRUE, unclass(trees) nTips <- length(trees[[1]]$tip.label) - fun1 <- function(x) { - w <- numeric(max(x$edge)) - w[x$edge[, 2]] <- x$edge.length - w - } + W <- lapply(trees, fun1) - fun2 <- function(x, nTips) { - bp <- bip(x) - bp <- SHORTwise(bp) - bp <- sapply(bp, paste, collapse = "_") - bp - } - fun3 <- function(x, nTips) { - bp <- bip(x) - bp <- sapply(bp, paste, collapse = "_") - bp - } + if (normalize) sc <- sapply(trees, function(x) sum(x$edge.length)) - if (rooted) BP <- lapply(trees, fun3, nTips) - else BP <- lapply(trees, fun2, nTips) + + BP <- lapply(trees, fun2, rooted) k <- 1 l <- length(trees) wRF <- numeric( (l * (l - 1)) / 2) @@ -732,15 +703,8 @@ kf0 <- function(tree1, tree2, check.labels = TRUE, rooted = FALSE) { } } - bp1 <- bip(tree1) - bp2 <- bip(tree2) - - if (!rooted) { - bp1 <- SHORTwise(bp1) - bp2 <- SHORTwise(bp2) - } - bp1 <- sapply(bp1, paste, collapse = "_") - bp2 <- sapply(bp2, paste, collapse = "_") + bp1 <- fun2(tree1, rooted) + bp2 <- fun2(tree1, rooted) w1 <- numeric(max(tree1$edge)) w2 <- numeric(max(tree2$edge)) @@ -782,31 +746,13 @@ kf1 <- function(tree, trees, check.labels = TRUE, rooted = FALSE) { nTips <- length(tree$tip.label) - fun1 <- function(x) { - w <- numeric(max(x$edge)) - w[x$edge[, 2]] <- x$edge.length - w - } W <- lapply(trees, fun1) - fun2 <- function(x, nTips) { - bp <- bip(x) - bp <- SHORTwise(bp) - bp <- sapply(bp, paste, collapse = "_") - bp - } - fun3 <- function(x, nTips) { - bp <- bip(x) - bp <- sapply(bp, paste, collapse = "_") - bp - } - if (rooted) BP <- lapply(trees, fun3, nTips) - else BP <- lapply(trees, fun2, nTips) + BP <- lapply(trees, fun2, rooted) if (!rooted && is.rooted(tree)) tree <- unroot(tree) - bp <- bip(tree) - if (!rooted) bp <- SHORTwise(bp) - bp <- sapply(bp, paste, collapse = "_") + + bp <- fun2(tree, rooted) w <- numeric(max(tree$edge)) w[tree$edge[, 2]] <- tree$edge.length @@ -843,27 +789,9 @@ kf2 <- function(trees, check.labels = TRUE, rooted = FALSE) { } unclass(trees) - fun1 <- function(x) { - w <- numeric(max(x$edge)) - w[x$edge[, 2]] <- x$edge.length - w - } W <- lapply(trees, fun1) - - fun2 <- function(x, nTips) { - bp <- bip(x) - bp <- SHORTwise(bp) - bp <- sapply(bp, paste, collapse = "_") - bp - } - fun3 <- function(x, nTips) { - bp <- bip(x) - bp <- sapply(bp, paste, collapse = "_") - bp - } - if (rooted) BP <- lapply(trees, fun3, nTips) - else BP <- lapply(trees, fun2, nTips) + BP <- lapply(trees, fun2, rooted) k <- 1 l <- length(trees) From 1cd2373eacc11e6cbf9a1fdb2c4062b928bb049a Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Wed, 31 Jan 2024 16:19:42 +0100 Subject: [PATCH 098/216] speed up example --- R/superTree.R | 4 ++-- man/superTree.Rd | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/R/superTree.R b/R/superTree.R index 03ffde53..ef7da707 100644 --- a/R/superTree.R +++ b/R/superTree.R @@ -147,9 +147,9 @@ dist.superTree <- function(tree, trace = 0, fun, start = NULL, #' bs <- bootstrap.phyDat(Laurasiatherian, #' FUN = function(x) upgma(dist.hamming(x)), bs=50) #' -#' mrp_st <- superTree(bs) +#' mrp_st <- superTree(bs, minit = 25, maxit=50) #' plot(mrp_st) -#' \dontrun{ +#' \donttest{ #' rf_st <- superTree(bs, method = "RF") #' spr_st <- superTree(bs, method = "SPR") #' } diff --git a/man/superTree.Rd b/man/superTree.Rd index d9712024..38325c63 100644 --- a/man/superTree.Rd +++ b/man/superTree.Rd @@ -48,9 +48,9 @@ set.seed(1) bs <- bootstrap.phyDat(Laurasiatherian, FUN = function(x) upgma(dist.hamming(x)), bs=50) -mrp_st <- superTree(bs) +mrp_st <- superTree(bs, minit = 25, maxit=50) plot(mrp_st) -\dontrun{ +\donttest{ rf_st <- superTree(bs, method = "RF") spr_st <- superTree(bs, method = "SPR") } From 66872776a858b143275ef4644298652a14f7d028 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Wed, 31 Jan 2024 16:56:49 +0100 Subject: [PATCH 099/216] bugfix --- R/treedist.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/treedist.R b/R/treedist.R index 4097e4df..c394fd92 100644 --- a/R/treedist.R +++ b/R/treedist.R @@ -704,7 +704,7 @@ kf0 <- function(tree1, tree2, check.labels = TRUE, rooted = FALSE) { } bp1 <- fun2(tree1, rooted) - bp2 <- fun2(tree1, rooted) + bp2 <- fun2(tree2, rooted) w1 <- numeric(max(tree1$edge)) w2 <- numeric(max(tree2$edge)) From 312b1278ddda09c94b6d98489864354013f1421e Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Wed, 7 Feb 2024 16:01:20 +0100 Subject: [PATCH 100/216] apex is back on CRAN --- DESCRIPTION | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index d2da01dc..4f7d8d89 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -41,6 +41,7 @@ Depends: ape (>= 5.7), R (>= 4.1.0) Imports: + apex, digest, fastmatch, generics, @@ -66,8 +67,7 @@ Suggests: xtable LinkingTo: Rcpp -Remotes: github::KlausVigo/apex, - github::EmmanuelParadis/ape +Remotes: github::EmmanuelParadis/ape VignetteBuilder: knitr, utils @@ -75,5 +75,5 @@ biocViews: Software, Technology, QualityControl Encoding: UTF-8 Repository: CRAN Roxygen: list(old_usage = TRUE) -RoxygenNote: 7.3.0 +RoxygenNote: 7.3.1 Language: en-US From d821bd6cfbf708b9d4e0150be35a82600d9fa9ba Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Wed, 7 Feb 2024 17:53:08 +0100 Subject: [PATCH 101/216] cleaning up --- vignettes/Ancestral.Rmd | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/vignettes/Ancestral.Rmd b/vignettes/Ancestral.Rmd index 9b6831e6..c965cff0 100644 --- a/vignettes/Ancestral.Rmd +++ b/vignettes/Ancestral.Rmd @@ -18,21 +18,22 @@ vignette: | knitr::opts_chunk$set(fig.width=6, fig.height=4) #, global.par=TRUE options(digits = 4) +suppressPackageStartupMessages(library(phangorn)) ``` - # Introduction These notes describe the ancestral sequence reconstruction using the _phangorn_ package [@Schliep2011]. _phangorn_ provides several methods to estimate ancestral character states with either Maximum Parsimony (MP) or Maximum Likelihood (ML). For more background on all the methods see e.g. [@Felsenstein2004] or [@Yang2006]. # Parsimony reconstructions + To reconstruct ancestral sequences we first load some data and reconstruct a tree: ```{r parsimony} library(phangorn) fdir <- system.file("extdata/trees", package = "phangorn") primates <- read.phyDat(file.path(fdir, "primates.dna"), format = "interleaved") -tree <- pratchet(primates, trace=0) |> acctran(primates) +tree <- pratchet(primates, trace=0) |> acctran(primates) |> makeNodeLabel() parsimony(tree, primates) ``` From 54f6361b6289b88ec6c6d298d85e7f2c848d9f58 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Wed, 7 Feb 2024 20:52:59 +0100 Subject: [PATCH 102/216] bug fix --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 4f7d8d89..cbee4437 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -41,7 +41,6 @@ Depends: ape (>= 5.7), R (>= 4.1.0) Imports: - apex, digest, fastmatch, generics, @@ -56,6 +55,7 @@ Imports: stats, utils Suggests: + apex, Biostrings, knitr, magick, From 636b78c0efb03e40024790438b18edbbd99b7330 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Thu, 8 Feb 2024 15:19:22 +0100 Subject: [PATCH 103/216] add apex support --- R/read.nexus.partitions.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/read.nexus.partitions.R b/R/read.nexus.partitions.R index a43dfabc..6539ff53 100644 --- a/R/read.nexus.partitions.R +++ b/R/read.nexus.partitions.R @@ -69,13 +69,13 @@ read.nexus.charset <- function(file){ #' @rdname read.nexus.partitions #' @export read.nexus.partitions <- function(file, return="list", ...){ -# return <- match.arg(return, c("list", "multiphyDat")) + return <- match.arg(return, c("list", "multiphyDat")) dat <- read.phyDat(file, format="nexus", ...) genes <- read.nexus.charset(file) if(is.null(genes)) stop(paste(file, "does not contain Charset!")) seq <- lapply(genes, \(x, dat)dat[,x], dat) names(seq) <- names(genes) -# if(return=="multiphyDat" && requireNamespace("apex")) -# seq <- new("multiphyDat", seq = seq, add.gaps = FALSE) + if(return=="multiphyDat" && requireNamespace("apex")) + seq <- new("multiphyDat", seq = seq, add.gaps = FALSE) seq } From 680329fe8f91ebbc1b403a0552e76c9d1d1cec32 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Fri, 9 Feb 2024 09:05:55 +0100 Subject: [PATCH 104/216] new tests --- inst/tinytest/test_phyDat.R | 31 ++++++++++++++++++++++++++++--- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/inst/tinytest/test_phyDat.R b/inst/tinytest/test_phyDat.R index b178c1fc..290e93ca 100644 --- a/inst/tinytest/test_phyDat.R +++ b/inst/tinytest/test_phyDat.R @@ -4,7 +4,7 @@ data(chloroplast) set.seed(42) tree <- rtree(10) codon_align <- simSeq(tree, l=100, type = "CODON") - +user_align <- simSeq(tree, l=100, type = "USER", levels=c(0:9,"-")) phy_matrix <- as.character(Laurasiatherian) phy_df <- as.data.frame(Laurasiatherian) @@ -87,14 +87,39 @@ unlink("tmp1.nex") write.phyDat(chloroplast, "tmp2.txt") expect_true(inherits(chloro <- read.phyDat("tmp2.txt", type="AA"), "phyDat")) -expect_equal(chloro, chloroplast) # changed to toupper +expect_equal(chloro, chloroplast) unlink("tmp2.txt") + write.phyDat(chloroplast, "tmp.fas", format="fasta") expect_true(inherits(chloro_fas <- read.phyDat("tmp.fas", type="AA", format = "fasta"), "phyDat")) -expect_equal(chloro_fas, chloroplast) # changed to toupper +expect_equal(chloro_fas, chloroplast) unlink("tmp.fas") +write.phyDat(chloroplast, "tmp2.nex", format="nexus") +expect_true(inherits(chloro_nex <- read.phyDat("tmp2.nex", type="AA", + format = "nexus"), "phyDat")) +expect_equal(chloro_nex, chloroplast) +unlink("tmp2.nex") + +write.phyDat(user_align, "tmp3.txt") +expect_true(inherits(user2 <- read.phyDat("tmp3.txt", type="USER", + levels=c(0:9,"-"), ambiguity="?"), "phyDat")) +expect_equal(user2, user_align) +unlink("tmp3.txt") + +write.phyDat(user_align, "tmp3.fas", format="fasta") +expect_true(inherits(user_fas <- read.phyDat("tmp3.fas", type="USER", + levels=c(0:9,"-"), ambiguity="?", format = "fasta"), "phyDat")) +expect_equal(user_fas, user_align) +unlink("tmp3.fas") + +write.phyDat(user_align, "tmp3.nex", format="nexus") +expect_true(inherits(user_nex <- read.phyDat("tmp3.nex", type="STANDARD", + format = "nexus"), "phyDat")) +expect_equal(user_nex, user_align, check.attributes = FALSE) +unlink("tmp3.nex") + # test removing duplicated sequences tmp <- as.character(Laurasiatherian) From f142b40948213c3ec3c5ff23da1d0d23bd8f8e72 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Wed, 14 Feb 2024 14:25:35 +0100 Subject: [PATCH 105/216] clean up --- R/candidate_tree.R | 5 ----- 1 file changed, 5 deletions(-) diff --git a/R/candidate_tree.R b/R/candidate_tree.R index 2db4a653..754725c3 100644 --- a/R/candidate_tree.R +++ b/R/candidate_tree.R @@ -46,11 +46,6 @@ candidate_tree <- function(x, method=c("unrooted", "ultrametric", "tipdated"), if(is.null(names(tip.dates))) names(tip.dates) <- names(x) dm <- dist.ml(x, ...) tree <- supgma(dm, tip.dates) -# tree <- fastme.ols(dm) -# tree <- rtt(tree, tip.dates[tree$tip.label]) -# tree <- nnls.tree(dm, tree, method = "tipdated", -# tip.dates=tip.dates[tree$tip.label]) -# tree$tip.dates <- tip.dates[tree$tip.label] } minEdge(tree, tau=eps, enforce_ultrametric=enforce_ultrametric) } From 0ef34aa5589275463861cca441108bb1f3ec9014 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Wed, 14 Feb 2024 14:28:03 +0100 Subject: [PATCH 106/216] improve error messages --- R/codon.R | 6 +++--- R/draw_CI.R | 2 +- R/fitch64.R | 2 +- R/hadamard.R | 2 +- R/phyDat.R | 8 ++++---- R/plotBS.R | 2 +- R/pmlPart.R | 2 +- R/pml_bb.R | 2 +- R/splits.R | 7 +++---- R/superTree.R | 2 +- R/treeRearrangement.R | 2 +- R/treedist.R | 27 ++++++++------------------- 12 files changed, 26 insertions(+), 38 deletions(-) diff --git a/R/codon.R b/R/codon.R index 2495c384..12a011c3 100644 --- a/R/codon.R +++ b/R/codon.R @@ -57,8 +57,8 @@ #' @rdname dna2codon #' @export dna2codon <- function(x, codonstart=1, code=1, ambiguity="---", ...){ - if(!inherits(x, "phyDat"))stop("x needs to be of class phyDat!") - if(attr(x, "type")=="AA")stop("x needs to be a nucleotide sequence!") + if(!inherits(x, "phyDat"))stop("x must be of class phyDat") + if(attr(x, "type")=="AA")stop("x must be a nucleotide sequence!") if(codonstart>1){ del <- -seq_len(codonstart) @@ -76,7 +76,7 @@ dna2codon <- function(x, codonstart=1, code=1, ambiguity="---", ...){ #' @rdname dna2codon #' @export codon2dna <- function(x){ - if(!inherits(x, "phyDat"))stop("x needs to be of class phyDat!") + if(!inherits(x, "phyDat"))stop("x must be of class phyDat") phyDat.DNA(as.character(x)) } diff --git a/R/draw_CI.R b/R/draw_CI.R index a92dea8d..ef1a71fb 100644 --- a/R/draw_CI.R +++ b/R/draw_CI.R @@ -1,5 +1,5 @@ edge_length_matrix <- function(tree, trees, rooted=TRUE){ - if(!inherits(trees, "multiPhylo")) stop("Trees must be of class multiPhylo!") + if(!inherits(trees, "multiPhylo")) stop("trees must be of class multiPhylo") if(inherits(tree, "networx")) rooted <- FALSE trees <- .uncompressTipLabel(trees) |> .compressTipLabel(ref=tree$tip.label) if(!rooted){ diff --git a/R/fitch64.R b/R/fitch64.R index 887b3447..95604abb 100644 --- a/R/fitch64.R +++ b/R/fitch64.R @@ -261,7 +261,7 @@ optim.fitch <- function(tree, data, trace = 1, rearrangements = "NNI", ...) { } if (is.null(attr(tree, "order")) || attr(tree, "order") != "postorder") tree <- reorder(tree, "postorder") - if (class(data)[1] != "phyDat") stop("data must be of class phyDat") + if (!inherits(data, "phyDat")) stop("data must be of class phyDat") rt <- FALSE diff --git a/R/hadamard.R b/R/hadamard.R index 1be7830c..68efc9b1 100644 --- a/R/hadamard.R +++ b/R/hadamard.R @@ -209,7 +209,7 @@ distanceHadamard <- function(dm, eps = 0.001) { #' @rdname hadamard #' @export h4st <- function(obj, levels = c("a", "c", "g", "t")) { - if (!inherits(obj, "phyDat")) stop("obj needs to be of class phyDat!") + if (!inherits(obj, "phyDat")) stop("obj must be of class phyDat") if (attr(obj, "nc") != 4L) stop("Error") obj <- removeAmbiguousSites(obj) obj <- as.data.frame(t(as.character(obj))) diff --git a/R/phyDat.R b/R/phyDat.R index 64b6728e..da154fe5 100644 --- a/R/phyDat.R +++ b/R/phyDat.R @@ -98,7 +98,7 @@ cbind.phyDat <- function(..., gaps="-", compress=TRUE){ if (n == 1) return(x[[1]]) types <- sapply(x, function(x)attr(x, "type")) -# if(length(unique(types))>1) stop("All alignments need to have the same type!") + if(any(types!=types[1]))stop("Alignments must have same type!") nr <- numeric(n) ATTR <- attributes(x[[1]]) nr[1] <- sum(attr(x[[1]], "weight")) @@ -540,10 +540,10 @@ read.aa <- function (file, format = "interleaved", skip = 0, nlines = 0, oop <- options(warn = -1) fl.num <- as.numeric(unlist(strsplit(gsub("^ +", "", fl), " +"))) options(oop) - if (all(is.na(fl.num))) + if (all(is.na(fl.num)) || length(fl.num) != 2) stop("the first line of the file must contain the dimensions of the data") - if (length(fl.num) != 2) - stop("the first line of the file must contain TWO numbers") +# if (length(fl.num) != 2) +# stop("the first line of the file must contain the dimensions of the data") else { n <- fl.num[1] s <- fl.num[2] diff --git a/R/plotBS.R b/R/plotBS.R index c3571960..5588f297 100644 --- a/R/plotBS.R +++ b/R/plotBS.R @@ -109,7 +109,7 @@ plotBS <- function(tree, trees, type = "phylogram", method="FBP", type <- match.arg(type, c("phylogram", "cladogram", "fan", "unrooted", "radial", "none")) if(inherits(tree, "pml")) tree <- tree$tree - if(!inherits(tree, "phylo")) stop("tree must be of class phylo!") + if(!inherits(tree, "phylo")) stop("tree must be of class phylo") # method <- match.arg(method, c("FBP", "TBE", "MCC"), several.ok=TRUE) # wird in support gecheckt if (hasArg(trees)) { diff --git a/R/pmlPart.R b/R/pmlPart.R index 3d2893e2..b1350405 100644 --- a/R/pmlPart.R +++ b/R/pmlPart.R @@ -251,7 +251,7 @@ makePart <- function(fit, rooted, weight = ~index + genes) { multiphyDat2pmlPart <- function(x, method="unrooted", tip.dates=NULL, ...) { if(is.list(x) && all(sapply(x,inherits, "phyDat"))) seq <- x else if(inherits(x, "multiphyDat")) seq <- x@seq - else stop("x must be of class 'multiphyDat' or a list of 'phyDat' objects!") + else stop("x must be of class 'multiphyDat' or a list of 'phyDat' objects") shared_tree <- TRUE if (shared_tree) { concatenate_x <- do.call(cbind.phyDat, seq) diff --git a/R/pml_bb.R b/R/pml_bb.R index 2b7826a0..9e2d1a7a 100644 --- a/R/pml_bb.R +++ b/R/pml_bb.R @@ -127,7 +127,7 @@ split_model <- function(x="GTR + G(4) + I", type="DNA"){ tmp <- match(m, mods) if(all(is.na(tmp))) stop("Could not find model!") else pos <- tmp[!is.na(tmp)] - if(length(pos)>1) stop("Error, fould several models!") + if(length(pos)>1) stop("Error, found several models!") model <- mods[pos] m <- m[is.na(tmp)] diff --git a/R/splits.R b/R/splits.R index 0cc2a51a..27b83985 100644 --- a/R/splits.R +++ b/R/splits.R @@ -433,7 +433,7 @@ as.splits.bitsplits <- function(x, ...){ #' @export compatible <- function(obj1, obj2 = NULL) { if (!inherits(obj1, "splits")) - stop("obj needs to be of class splits") + stop("obj must be of class splits") labels <- attr(obj1, "labels") l <- length(labels) n <- length(obj1) @@ -469,11 +469,10 @@ compatible <- function(obj1, obj2 = NULL) { # in clanistic.R ?? compatible3 <- function(x, y = NULL) { if (!inherits(x, "splits")) - stop("x needs to be of class splits") + stop("x must be of class splits") if (is.null(y)) y <- x - if (!inherits(y, "splits")) - stop("y needs to be of class splits") + stop("y must be of class splits") xlabels <- attr(x, "labels") ylabels <- attr(y, "labels") if (identical(xlabels, ylabels)) labels <- xlabels diff --git a/R/superTree.R b/R/superTree.R index ef7da707..95c8c14f 100644 --- a/R/superTree.R +++ b/R/superTree.R @@ -1,7 +1,7 @@ tree2phyDat <- function(trees) { # some minor error checking if (!inherits(trees, "multiPhylo")) - stop("trees must be object of class 'multiPhylo.'") + stop("trees must be of class multiPhylo") labels <- lapply(trees, function(x) sort(x$tip.label)) ulabels <- unique(labels) diff --git a/R/treeRearrangement.R b/R/treeRearrangement.R index 5f089338..b783dd2d 100644 --- a/R/treeRearrangement.R +++ b/R/treeRearrangement.R @@ -212,7 +212,7 @@ kSPR <- function(tree, k = NULL) { oneOf4 <- function(tree, ind1, ind2, from = 1, to = 1, root) { if (!is.binary(tree)) - stop("Sorry, trees must be binary!") + stop("trees must be binary") tree <- reroot(tree, ind2, FALSE) kids1 <- Children(tree, ind1) anc <- Ancestors(tree, ind1, "all") diff --git a/R/treedist.R b/R/treedist.R index c394fd92..462047e2 100644 --- a/R/treedist.R +++ b/R/treedist.R @@ -172,7 +172,8 @@ treedist <- function(tree1, tree2, check.labels = TRUE) { branch.score.difference <- NULL path.difference <- NULL quadratic.path.difference <- NULL - if (!is.binary(tree1) | !is.binary(tree2)) message("Trees are not binary!") + if (!is.binary(tree1) | !is.binary(tree2)) + message("Some trees are not binary. Result may not what you expect!") bp1 <- bip(tree1) bp2 <- bip(tree2) @@ -248,7 +249,8 @@ sprdist <- function(tree1, tree2) { # side of splits) tree1 <- reorder(tree1, "postorder") tree2 <- reorder(tree2, "postorder") - if (!is.binary(tree1) | !is.binary(tree2)) message("Trees are not binary!") + if (!is.binary(tree1) | !is.binary(tree2)) + message("Some trees are not binary. Result may not what you expect!") # possibly replace bip with bipart bp1 <- bip(tree1) bp1 <- SHORTwise(bp1) @@ -368,7 +370,7 @@ wRF0 <- function(tree1, tree2, normalize = FALSE, check.labels = TRUE, tree2 <- unroot(tree2) } if (!is.binary(tree1) | !is.binary(tree2)) - message("Trees are not binary!") + message("Some trees are not binary. Result may not what you expect!") if (check.labels) tree2 <- checkLabels(tree2, tree1$tip.label) if (has.singles(tree1)) tree1 <- collapse.singles(tree1) if (has.singles(tree2)) tree2 <- collapse.singles(tree2) @@ -505,21 +507,9 @@ wRF1 <- function(trees, normalize = FALSE, check.labels = TRUE, mRF2 <- function(tree, trees, normalize = FALSE, check.labels = TRUE, rooted = FALSE) { - if (!inherits(trees, "multiPhylo")) - stop("Argument trees should be an object of class \"multiPhylo\"") - if (!inherits(tree, "phylo")) - stop("Argument tree should be an object of class \"phylo\"") trees <- .compressTipLabel(trees) tipLabel <- attr(trees, "TipLabel") if (check.labels) tree <- checkLabels(tree, tipLabel) -# if (check.labels) { -# ind <- match(tipLabel, tree$tip.label) -# if (any(is.na(ind)) | length(tipLabel) != length(tree$tip.label)) -# stop("trees have different labels") -# tree$tip.label <- tree$tip.label[ind] -# ind2 <- match(seq_along(ind), tree$edge[, 2]) -# tree$edge[ind2, 2] <- order(ind) -# } nTips <- length(tipLabel) l <- length(trees) RF <- numeric(l) @@ -563,8 +553,6 @@ mRF2 <- function(tree, trees, normalize = FALSE, check.labels = TRUE, mRF <- function(trees, normalize = FALSE, rooted = FALSE) { - if (!inherits(trees, "multiPhylo")) - stop("Argument trees should be an object of class \"multiPhylo\"") trees <- .compressTipLabel(trees) tipLabel <- attr(trees, "TipLabel") nTips <- length(tipLabel) @@ -572,7 +560,7 @@ mRF <- function(trees, normalize = FALSE, rooted = FALSE) { RF <- numeric( (l * (l - 1)) / 2) if (rooted && any(!is.rooted(trees))) { - warning("Some trees were rooted, unrooted all") + warning("some trees were rooted, unrooted all") rooted <- FALSE } if (!rooted) { @@ -637,7 +625,8 @@ RF0 <- function(tree1, tree2 = NULL, normalize = FALSE, check.labels = TRUE, } } if (check.labels) tree2 <- checkLabels(tree2, tree1$tip.label) - if (!is.binary(tree1) | !is.binary(tree2)) message("Trees are not binary!") + if (!is.binary(tree1) | !is.binary(tree2)) + message("Some trees are not binary. Result may not what you expect!") bp1 <- bipart(tree1) bp2 <- bipart(tree2) nTips <- length(tree1$tip.label) From 717f47d89525dae3350ac42ab5f6eb5bb73ab90a Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Wed, 14 Feb 2024 14:38:15 +0100 Subject: [PATCH 107/216] small API change --- R/transferBootstrap.R | 24 ++++++++++-------------- R/treeManipulation.R | 4 ++-- man/midpoint.Rd | 6 +++--- man/transferBootstrap.Rd | 10 ++-------- 4 files changed, 17 insertions(+), 27 deletions(-) diff --git a/R/transferBootstrap.R b/R/transferBootstrap.R index d83ceb0e..b6d0620f 100644 --- a/R/transferBootstrap.R +++ b/R/transferBootstrap.R @@ -5,14 +5,10 @@ #' values to the (internal) edges. #' #' @param tree The tree on which edges the bootstrap values are plotted. -#' @param BStrees a list of trees (object of class "multiPhylo"). +#' @param trees a list of trees (object of class "multiPhylo"). #' @param phylo Logical, return a phylogentic tree with support value or a #' vector of bootstrap values. #' @param scale scale the values. -#' @return \code{plotBS} returns silently a tree, i.e. an object of class -#' \code{phylo} with the bootstrap values as node labels. The argument -#' \code{BSTrees} is optional and if not supplied the labels supplied -#' in the \code{node.label} slot will be used. #' @author Klaus Schliep \email{klaus.schliep@@gmail.com} #' @seealso \code{\link{plotBS}}, \code{\link{maxCladeCred}}, #' \code{\link{drawSupportOnEdges}} @@ -33,19 +29,19 @@ #' # same as #' plotBS(raxml.tree, raxml.bootstrap, "p", "TBE") #' @export -transferBootstrap <- function(tree, BStrees, phylo=TRUE, scale=TRUE){ - if(!inherits(BStrees, "multiPhylo")) - stop("BSTrees needs to be of class multiPhylo!") - BStrees <- .uncompressTipLabel(BStrees) - BStrees <- .compressTipLabel(BStrees, tree$tip.label) - BStrees <- reorder(BStrees, "postorder") +transferBootstrap <- function(tree, trees, phylo=TRUE, scale=TRUE){ + if(!inherits(trees, "multiPhylo")) + stop("trees must be of class multiPhylo") + trees <- .uncompressTipLabel(trees) + trees <- .compressTipLabel(trees, tree$tip.label) + trees <- reorder(trees, "postorder") l <- Ntip(tree) bp <- prop.part(tree) bp <- SHORTwise(bp)[-1] not_cherry <- lengths(bp) != 2 res <- numeric(length(bp)) - for(i in seq_along(BStrees)){ - tmp <- BStrees[[i]] + for(i in seq_along(trees)){ + tmp <- trees[[i]] bptmp <- prop.part(tmp) bptmp <- SHORTwise(bptmp)[-1] ind <- fmatch(bp, bptmp) @@ -54,7 +50,7 @@ transferBootstrap <- function(tree, BStrees, phylo=TRUE, scale=TRUE){ ind <- which(is.na(ind) & not_cherry) for(j in ind) res[j] <- res[j] + Transfer_Index(bp[[j]], tmp$edge, l) } - res <- res / length(BStrees) + res <- res / length(trees) if(! scale) res <- res * 100 res <- c(NA_real_, res) if(!phylo) return(res) diff --git a/R/treeManipulation.R b/R/treeManipulation.R index a2111c56..62f09eb0 100644 --- a/R/treeManipulation.R +++ b/R/treeManipulation.R @@ -75,7 +75,7 @@ changeEdgeLength <- function(tree, edge, edge.length) { #' \code{midpoint} by default assumes that node labels contain support values. #' This works if support values are computed from splits, but should be #' recomputed for clades. -#' \code{keep.as.tip} takes a list of tips and/or node labels and returns a tree +#' \code{keep_as_tip} takes a list of tips and/or node labels and returns a tree #' pruned to those. If node label, then it prunes all descendants of that node #' until that internal node becomes a tip. @@ -865,7 +865,7 @@ relabel <- function(y, ref) { #' @rdname midpoint #' @param labels tip and node labels to keep as tip labels in the tree #' @export -keep.as.tip<- function(tree, labels){ +keep_as_tip<- function(tree, labels){ nodes_to_keep <- labels[!is.na(match(labels, tree$node.label))] tips_to_remove <- Descendants(tree, nodes_to_keep) |> unlist() |> unique() tree_1 <- drop.tip(tree, tips_to_remove, subtree = TRUE) diff --git a/man/midpoint.Rd b/man/midpoint.Rd index 762fc918..5225c499 100644 --- a/man/midpoint.Rd +++ b/man/midpoint.Rd @@ -6,7 +6,7 @@ \alias{midpoint.phylo} \alias{midpoint.multiPhylo} \alias{pruneTree} -\alias{keep.as.tip} +\alias{keep_as_tip} \title{Tree manipulation} \usage{ getRoot(tree) @@ -19,7 +19,7 @@ midpoint(tree, node.labels = "support", ...) pruneTree(tree, ..., FUN = ">=") -keep.as.tip(tree, labels) +keep_as_tip(tree, labels) } \arguments{ \item{tree}{an object of class \code{phylo}.} @@ -47,7 +47,7 @@ as.numeric(as.character(tree$node.labels)) to convert them. \code{midpoint} by default assumes that node labels contain support values. This works if support values are computed from splits, but should be recomputed for clades. -\code{keep.as.tip} takes a list of tips and/or node labels and returns a tree +\code{keep_as_tip} takes a list of tips and/or node labels and returns a tree pruned to those. If node label, then it prunes all descendants of that node until that internal node becomes a tip. } diff --git a/man/transferBootstrap.Rd b/man/transferBootstrap.Rd index d54eed1c..32eefe94 100644 --- a/man/transferBootstrap.Rd +++ b/man/transferBootstrap.Rd @@ -4,24 +4,18 @@ \alias{transferBootstrap} \title{Transfer Bootstrap} \usage{ -transferBootstrap(tree, BStrees, phylo = TRUE, scale = TRUE) +transferBootstrap(tree, trees, phylo = TRUE, scale = TRUE) } \arguments{ \item{tree}{The tree on which edges the bootstrap values are plotted.} -\item{BStrees}{a list of trees (object of class "multiPhylo").} +\item{trees}{a list of trees (object of class "multiPhylo").} \item{phylo}{Logical, return a phylogentic tree with support value or a vector of bootstrap values.} \item{scale}{scale the values.} } -\value{ -\code{plotBS} returns silently a tree, i.e. an object of class -\code{phylo} with the bootstrap values as node labels. The argument -\code{BSTrees} is optional and if not supplied the labels supplied -in the \code{node.label} slot will be used. -} \description{ \code{transferBootstrap} assigns transfer bootstrap (Lemoine et al. 2018) values to the (internal) edges. From 88fa224b5153b5194b6b3f677af7170a7f867853 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Wed, 14 Feb 2024 16:52:25 +0100 Subject: [PATCH 108/216] bugfix --- NAMESPACE | 4 ++-- R/pmlPart.R | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 60295801..09225936 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -79,7 +79,6 @@ S3method(unique,dist) S3method(unique,phyDat) S3method(unique,splits) S3method(update,pml) -S3method(vcov,pml) export(AICc) export(Ancestors) export(CI) @@ -161,7 +160,7 @@ export(h2st) export(h4st) export(hadamard) export(hash) -export(keep.as.tip) +export(keep_as_tip) export(ldfactorial) export(lento) export(lli) @@ -231,6 +230,7 @@ export(tidy) export(transferBootstrap) export(treedist) export(upgma) +export(vcov.pml) export(wRF.dist) export(wpgma) export(write.nexus.dist) diff --git a/R/pmlPart.R b/R/pmlPart.R index b1350405..1d59c3a9 100644 --- a/R/pmlPart.R +++ b/R/pmlPart.R @@ -251,7 +251,7 @@ makePart <- function(fit, rooted, weight = ~index + genes) { multiphyDat2pmlPart <- function(x, method="unrooted", tip.dates=NULL, ...) { if(is.list(x) && all(sapply(x,inherits, "phyDat"))) seq <- x else if(inherits(x, "multiphyDat")) seq <- x@seq - else stop("x must be of class 'multiphyDat' or a list of 'phyDat' objects") + else stop("x must be of class multiphyDat or a list of phyDat objects") shared_tree <- TRUE if (shared_tree) { concatenate_x <- do.call(cbind.phyDat, seq) From 5e53e169313cc5db3e4143640d7112a88f5390e0 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Wed, 14 Feb 2024 17:24:58 +0100 Subject: [PATCH 109/216] bugfix --- NAMESPACE | 2 +- R/pml_generics.R | 1 + man/pml.Rd | 5 ++++- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 09225936..642acc15 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -79,6 +79,7 @@ S3method(unique,dist) S3method(unique,phyDat) S3method(unique,splits) S3method(update,pml) +S3method(vcov,pml) export(AICc) export(Ancestors) export(CI) @@ -230,7 +231,6 @@ export(tidy) export(transferBootstrap) export(treedist) export(upgma) -export(vcov.pml) export(wRF.dist) export(wpgma) export(write.nexus.dist) diff --git a/R/pml_generics.R b/R/pml_generics.R index 1b38e94c..28cf68fd 100644 --- a/R/pml_generics.R +++ b/R/pml_generics.R @@ -47,6 +47,7 @@ anova.pml <- function(object, ...) { } +#' @rdname pml #' @export vcov.pml <- function(object, ...) { FI <- score(object, FALSE)[[2]] diff --git a/man/pml.Rd b/man/pml.Rd index 679bbbfc..8f32357e 100644 --- a/man/pml.Rd +++ b/man/pml.Rd @@ -1,9 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/modelTest.R, R/phylo.R +% Please edit documentation in R/modelTest.R, R/phylo.R, R/pml_generics.R \name{as.pml} \alias{as.pml} \alias{pml} \alias{optim.pml} +\alias{vcov.pml} \title{Likelihood of a tree.} \usage{ as.pml(x, ...) @@ -16,6 +17,8 @@ optim.pml(object, optNni = FALSE, optBf = FALSE, optQ = FALSE, optRooted = FALSE, control = pml.control(), model = NULL, rearrangement = ifelse(optNni, "NNI", "none"), subs = NULL, ratchet.par = ratchet.control(), ...) + +\method{vcov}{pml}(object, ...) } \arguments{ \item{x}{So far only an object of class \code{modelTest}.} From 998c93fa3125d0f8aa01fbff2a30c7dd1f4dee9a Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Fri, 16 Feb 2024 13:14:02 +0100 Subject: [PATCH 110/216] use shared ape data --- R/plotAnc.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/plotAnc.R b/R/plotAnc.R index 49da06cd..1640a4cb 100644 --- a/R/plotAnc.R +++ b/R/plotAnc.R @@ -72,7 +72,7 @@ plotAnc <- function(tree, data, i = 1, site.pattern = FALSE, col = NULL, if(!is.null(scheme)){ scheme <- match.arg(scheme, c("Ape_AA", "Zappo_AA", "Clustal", "Polarity", "Transmembrane_tendency", "Ape_NT", "RY_NT")) - sc <- get(scheme, environment(pml)) + sc <- get(scheme, environment(ace)) P <- getTransition(sc, levels) y <- y %*% P levels <- colnames(P) From 4b07aac950d8f8161b8756df8714800a18918db8 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Wed, 21 Feb 2024 15:00:47 +0100 Subject: [PATCH 111/216] fixed esoteric error for invariant sites --- R/phylo.R | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/R/phylo.R b/R/phylo.R index 30be873c..2d24ba54 100644 --- a/R/phylo.R +++ b/R/phylo.R @@ -129,10 +129,14 @@ optimGamma <- function(tree, data, shape = 1, k = 4, ...) { } -optimInv <- function(tree, data, inv = 0.01, INV = NULL, ll.0 = NULL, ...) { +optimInv <- function(tree, data, inv = 0.01, INV, ...) { + weight <- as.double(attr(data, "weight")) + tmp <- as.vector( INV %*% rep(1, attr(data, "nc")) ) + ind <- which(tmp > 0) + max_inv <- sum(weight[ind]) / (sum(weight) + 1e-8) fn <- function(inv, tree, data, ...) pml.fit(tree, data, inv = inv, INV = INV, ll.0 = NULL, ...) - res <- optimize(f = fn, interval = c(0, 1), lower = 0, upper = 1, + res <- optimize(f = fn, interval = c(0, max_inv), lower = 0, upper = max_inv, maximum = TRUE, tol = .0001, tree = tree, data = data, ...) res } From ad146936f6f57dcc7e44422f51c2597572c2f917 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Fri, 23 Feb 2024 10:16:11 +0100 Subject: [PATCH 112/216] small updates --- inst/tinytest/test_pmlMix.R | 2 +- inst/tinytest/test_pmlPart.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/inst/tinytest/test_pmlMix.R b/inst/tinytest/test_pmlMix.R index 69c7bd14..74422ed7 100644 --- a/inst/tinytest/test_pmlMix.R +++ b/inst/tinytest/test_pmlMix.R @@ -121,4 +121,4 @@ fits <- list() for(i in 1:2) fits[[i]] <- pml(tree, X, wMix=.5) fitMixture <- pmlMix( ~ inv, fits, m=2, control=pml.control(maxit = 25, trace=0)) -expect_equal(logLik(fitMixture)[1], ll) +expect_equal(logLik(fitMixture)[1], ll, 1e-3) diff --git a/inst/tinytest/test_pmlPart.R b/inst/tinytest/test_pmlPart.R index b20de0e2..ff7e2daf 100644 --- a/inst/tinytest/test_pmlPart.R +++ b/inst/tinytest/test_pmlPart.R @@ -143,7 +143,7 @@ expect_equal(Q, sp$fits[[1]]$Q, tolerance=5e-4) # weights0 <- 1000*exp(fit0$siteLik) sp <- pmlPart( ~ inv, fit0, weight=W, control = pml.control(trace=0)) expect_equal(logLik(sp)[1], logLik(fit1)[1]*3, tolerance=5e-4 ) - expect_equal(inv, sp$fits[[1]]$inv, tolerance=5e-5) + expect_equal(inv, sp$fits[[1]]$inv, tolerance=5e-4) From 20a83c54153b2169e8fd4b7b6dbd2df937617ae8 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Fri, 23 Feb 2024 10:28:58 +0100 Subject: [PATCH 113/216] small improvement --- R/maxCladeCred.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/maxCladeCred.R b/R/maxCladeCred.R index 0b08c7fa..79ef2ead 100644 --- a/R/maxCladeCred.R +++ b/R/maxCladeCred.R @@ -109,7 +109,7 @@ mcc <- maxCladeCred #' @export allCompat <- function(x, rooted=FALSE) { x <- .compressTipLabel(x) - if(!rooted) x <- unroot(x) + if(!rooted) x <- unroot(x, collapse.singles=TRUE) l <- length(x) nt <- Ntip(x[[1]]) pp <- prop.part(x) From 1c6606cd8a9246f41e90f049d1f2e6a37e0d824d Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Fri, 23 Feb 2024 10:29:49 +0100 Subject: [PATCH 114/216] add immproved lower bound by default --- man/bab.Rd | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/man/bab.Rd b/man/bab.Rd index 923d0cf2..4f90ab72 100644 --- a/man/bab.Rd +++ b/man/bab.Rd @@ -5,7 +5,7 @@ \alias{BranchAndBound} \title{Branch and bound for finding all most parsimonious trees} \usage{ -bab(data, tree = NULL, trace = 0, ...) +bab(data, tree = NULL, trace = 0, ILBound = TRUE, ...) } \arguments{ \item{data}{an object of class phyDat.} @@ -15,6 +15,9 @@ pratchet search is performed.} \item{trace}{defines how much information is printed during optimization.} +\item{ILBound}{compute incompatibility lower bound (default TRUE) of +Holland (2005).} + \item{\dots}{Further arguments passed to or from other methods} } \value{ From ee876f7ec15b05647e5b5103d784b7c5451510ac Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Tue, 5 Mar 2024 14:39:27 +0100 Subject: [PATCH 115/216] clean up --- R/ancestral_pml.R | 8 +++++ R/linkfun.R | 88 +++++++++++++++++++++++++++++++++++++++++++++++ R/simSeq.R | 4 +-- 3 files changed, 97 insertions(+), 3 deletions(-) create mode 100644 R/linkfun.R diff --git a/R/ancestral_pml.R b/R/ancestral_pml.R index 0b2958ce..2f959f08 100644 --- a/R/ancestral_pml.R +++ b/R/ancestral_pml.R @@ -404,3 +404,11 @@ makeAncNodeLabel <- function(tree, ...){ tree <- makeNodeLabel(tree, ...) c(tree$tip.label, tree$node.label) } + + +identical_sites <- function(x){ + res <- rep(TRUE, attr(x, "nr")) + for(i in seq_along(x)) res <- res & (x[[i]] == x[[1]]) + which(res) +} + diff --git a/R/linkfun.R b/R/linkfun.R new file mode 100644 index 00000000..2a1c9cff --- /dev/null +++ b/R/linkfun.R @@ -0,0 +1,88 @@ +#' Family objects for evolutionary models +#' +#' Family objects provide a convenient way to specify the details of the models +#' used by functions such as \code{glm}. \code{binomial_mk} extends the +#' \code{binomial} family with the Mk model. The 2 state model is also known +#' Neyman and 4 state model as the Jukes Cantor model. +#' See the documentation for \code{\link{family}} for more details and +#' \code{\link{glm}} for the details on how such model fitting takes place. +#' +#' The link function for the Jukes Cantor 4 state model is +#' \deqn{g(\mu) = -0.75 \cdot log(1-(4/3)\cdot\mu)}{-0.75 * log(1 - (4/3) * mu)} +#' +#' @seealso \code{\link{family}}, \code{\link{binomial}}, \code{\link{glm}} +#' @param k number of states. +#' @importFrom stats binomial dbinom +#' @examples +#' plot(function(x) binomial_mk()$linkinv(x), 0, 1.5, , ylim=c(0, 1), asp=1, +#' main = "inverse link function (JC69)", ylab="f(x)") +#' abline(0, 1) +#' abline(h=.75) +#' plot(function(x) binomial_mk()$linkfun(x), 0, .75, +#' main = "link function (JC69)", ylab="f(x)") +#' +#' data(yeast) +#' dm <- dist.hamming(yeast, FALSE) +#' y <- cbind(dm, 127026 - dm) +#' tree <- nj(dm) +#' X <- designTree(tree) +#' glm(y ~ X -1, binomial_mk()) +#' @noRd +binomial_mk <- function (k=4) +{ + stats <- make_link_mk(k) + variance <- function(mu) mu * (1 - mu) + + tmp <- binomial() + aic <- function(y, n, mu, wt, dev) { + m <- if (any(n > 1)) + n + else wt + -2 * sum(ifelse(m > 0, (wt/m), 0) * dbinom(round(m * + y), round(m), mu, log = TRUE)) + } + # initialize fuer verschiedene Modelle veraendern !!! + initialize <- expression({ + if (NCOL(y) == 1) { + if (is.factor(y)) y <- y != levels(y)[1] + n <- rep.int(1, nobs) + if (any(y < 0 | y > 1)) stop("y values must be 0 <= y <= 1") + mustart <- (weights * y + 0.5)/(2*(weights + 1)) + m <- weights * y + if (any(abs(m - round(m)) > 0.001)) warning("non-integer #successes in a binomial glm!") + } else if (NCOL(y) == 2) { + if (any(abs(y - round(y)) > 0.001)) warning("non-integer counts in a binomial glm!") + n <- y[, 1] + y[, 2] + y <- ifelse(n == 0, 0, y[, 1]/n) + weights <- weights * n + mustart <- (n * y + 0.5)/(2*(n + 1)) + } else stop("for the binomial family, y must be a vector of 0 and 1's\n", + "or a 2 column matrix where col 1 is no. successes and col 2 is no. failures") + }) + structure(list(family = "binomial", link = "Mk", linkfun = stats$linkfun, + linkinv = stats$linkinv, variance = variance, dev.resids = tmp$dev.resids, + aic = aic, mu.eta = stats$mu.eta, initialize = tmp$initialize, + validmu = stats$validmu, valideta = stats$valideta, simulate = tmp$simfun, + dispersion = 1), class = "family") +} + + +make_link_mk <- function (n=4L){ + f <- substitute(ifelse(mu>0, -(k-1)/k * log(1-(k/(k-1))*mu),0), list(k=n)) + linkfun <- as.function(list(f)) + formals(linkfun) <- alist(mu=) + f <- substitute(ifelse(eta>0, (k-1)/k*(1-exp(-(k/(k-1))*eta)),0), list(k=n)) + linkinv <- as.function(list(f)) + formals(linkinv) <- alist(eta=) + f <- substitute(exp(-(k/(k-1))*eta), list(k=n)) + mu.eta <- as.function(list(f)) + formals(mu.eta) <- alist(eta=) + # validmu <- function(mu) all(mu > 0) && all(mu < .5) + f <- substitute(all(mu > 0) && all(mu < (k-1)/k), list(k=n)) + validmu <- as.function(list(f)) + formals(validmu) <- alist(mu=) + valideta <- function(eta) all(eta > 0) + list(linkfun = linkfun, linkinv = linkinv, mu.eta = mu.eta, + valideta = valideta) +} + diff --git a/R/simSeq.R b/R/simSeq.R index 66a8ba0a..111d0142 100644 --- a/R/simSeq.R +++ b/R/simSeq.R @@ -122,8 +122,6 @@ simSeq.phylo <- function(x, l = 1000, Q = NULL, bf = NULL, rootseq = NULL, if (pt == "AA") levels <- c("A", "R", "N", "D", "C", "Q", "E", "G", "H", "I", "L", "K", "M", "F", "P", "S", "T", "W", "Y", "V") -# c("a", "r", "n", "d", "c", "q", "e", "g", "h", "i", -# "l", "k", "m", "f", "p", "s", "t", "w", "y", "v") if (pt == "CODON") { .syn <- synonymous_subs(code=code) .sub <- tstv_subs(code=code) @@ -180,7 +178,7 @@ simSeq.phylo <- function(x, l = 1000, Q = NULL, bf = NULL, rootseq = NULL, if (pt == "DNA") return(phyDat.DNA(res, return.index = TRUE)) if (pt == "AA") return(phyDat.AA(res, return.index = TRUE)) if (pt == "USER") return(phyDat.default(res, levels = levels, - return.index = TRUE)) + return.index = TRUE, ...)) if (pt == "CODON") { res <- t(apply(res, 1, function(x) unlist(strsplit(x, "")))) return(phyDat.codon(res)) From 68fd4b55390886a98c32f58b4f1e61049ce19833 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Tue, 5 Mar 2024 14:42:33 +0100 Subject: [PATCH 116/216] nicer plots --- R/plotAnc.R | 6 +++++- man/gap_as_state.Rd | 39 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 1 deletion(-) create mode 100644 man/gap_as_state.Rd diff --git a/R/plotAnc.R b/R/plotAnc.R index 1640a4cb..bf5cd1eb 100644 --- a/R/plotAnc.R +++ b/R/plotAnc.R @@ -73,6 +73,10 @@ plotAnc <- function(tree, data, i = 1, site.pattern = FALSE, col = NULL, scheme <- match.arg(scheme, c("Ape_AA", "Zappo_AA", "Clustal", "Polarity", "Transmembrane_tendency", "Ape_NT", "RY_NT")) sc <- get(scheme, environment(ace)) + if(has_gap_state(data)){ + sc$properties <- c(sc$properties, Gap="-") + sc$color <- c(sc$color, "#FFFFFF") + } P <- getTransition(sc, levels) y <- y %*% P levels <- colnames(P) @@ -99,5 +103,5 @@ plotAnc <- function(tree, data, i = 1, site.pattern = FALSE, col = NULL, sel = seq_along(XX), thermo = NULL, piecol = col, col = "black", bg = "lightblue", horiz = FALSE, width = NULL, height = NULL, cex = cex.pie ) - if (!is.null(pos)) legend(pos, legend=levels, pch=19, col = col) + if (!is.null(pos)) legend(pos, legend=levels, pch=21, pt.bg = col) } diff --git a/man/gap_as_state.Rd b/man/gap_as_state.Rd new file mode 100644 index 00000000..a02f390f --- /dev/null +++ b/man/gap_as_state.Rd @@ -0,0 +1,39 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/gap_as_state.R +\name{gap_as_state} +\alias{gap_as_state} +\alias{gap_as_ambiguous} +\title{Treat gaps as a state} +\usage{ +gap_as_state(obj, gap = "-", ambiguous = "?") + +gap_as_ambiguous(obj, gap = "-") +} +\arguments{ +\item{obj}{An object of class phyDat.} + +\item{gap}{a character which codes for the gaps (default is "-").} + +\item{ambiguous}{a character which codes for the ambiguous state} +} +\value{ +The functions return an object of class \code{phyDat}. +} +\description{ +The function \code{gap_as_state} changes the contrast of an phyDat object to +treat as its own state. +} +\examples{ +data(Laurasiatherian) +tmp <- gap_as_state(Laurasiatherian) +contr <- attr(tmp, "contrast") +rownames(contr) <- attr(tmp, "allLevels") +contr +} +\seealso{ +\code{\link{phyDat}} +} +\author{ +Klaus Schliep \email{klaus.schliep@gmail.com} +} +\keyword{cluster} From 65013475270c257b9c8cbde7221d0889a08f27b1 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Tue, 5 Mar 2024 15:56:53 +0100 Subject: [PATCH 117/216] improve image (allow gaps as states) --- R/image_phyDat.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/image_phyDat.R b/R/image_phyDat.R index 0f3961bf..c7d52758 100644 --- a/R/image_phyDat.R +++ b/R/image_phyDat.R @@ -9,6 +9,7 @@ #' @method image phyDat #' @export image.phyDat <- function(x, ...){ + x <- gap_as_ambiguous(x) if(attr(x, "type") == "AA") image(as.AAbin(x), ...) if(attr(x, "type") == "DNA") image(as.DNAbin(x), ...) else return(NULL) From 0ea5b08b0d3372d45e3d95433367e59d06051058 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Tue, 5 Mar 2024 15:58:35 +0100 Subject: [PATCH 118/216] Ancestral states of constant sites are now always this state --- R/ancestral_pml.R | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/R/ancestral_pml.R b/R/ancestral_pml.R index 2f959f08..1c70afc8 100644 --- a/R/ancestral_pml.R +++ b/R/ancestral_pml.R @@ -148,6 +148,13 @@ ancestral.pml <- function(object, type = "marginal", return = "prob", ...) { } result[[j]] <- tmp } + ind <- identical_sites(data) + if(length(ind)>0){ + for(k in (nTips+1L):m){ + if(return=="prob") result[[k]][ind,] <- result[[1]][ind,] + else result[[k]][ind] <- data[[1]][ind] + } + } attributes(result) <- x attr(result, "call") <- call if(return=="prob") class(result) <- c("ancestral", "phyDat") @@ -223,6 +230,14 @@ ancestral.pars <- function(tree, data, type = c("MPR", "ACCTRAN", "POSTORDER"), res <- mpr(tree, data, cost = cost, return = return) attr(res, "call") <- call } + data <- data[tree$tip.label,] + ind <- identical_sites(data) + if(length(ind)>0){ + for(k in (Ntip(tree)+1L):length(res)){ + if(return=="prob") result[[k]][ind,] <- result[[1]][ind,] + else result[[k]][ind] <- data[[1]][ind] + } + } res } @@ -411,4 +426,3 @@ identical_sites <- function(x){ for(i in seq_along(x)) res <- res & (x[[i]] == x[[1]]) which(res) } - From d016642a519c5f14fad72f67bce253003ae4a819 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Wed, 6 Mar 2024 12:27:57 +0100 Subject: [PATCH 119/216] export glance methods --- R/codonTest.R | 4 +- po/R-phangorn.pot | 432 ++++++++++++++++++++++++++++++++++++++++++++++ po/phangorn.pot | 12 ++ 3 files changed, 446 insertions(+), 2 deletions(-) create mode 100644 po/R-phangorn.pot create mode 100644 po/phangorn.pot diff --git a/R/codonTest.R b/R/codonTest.R index 7303c2d4..7006fe0e 100644 --- a/R/codonTest.R +++ b/R/codonTest.R @@ -133,7 +133,7 @@ codonTest <- function(tree, object, model = c("M0", "M1a", "M2a"), } -# tidy codon +#' @export glance.pml <- function(x, ...) { res <- data.frame(logLik = x$logLik, df = x$df, @@ -142,7 +142,7 @@ glance.pml <- function(x, ...) { res } - +#' @export glance.pmlMix <- function(x, ...) { nr <- attr(x$fits[[1]]$data, "nr") res <- data.frame(logLik = x$logLik, diff --git a/po/R-phangorn.pot b/po/R-phangorn.pot new file mode 100644 index 00000000..3ca8e446 --- /dev/null +++ b/po/R-phangorn.pot @@ -0,0 +1,432 @@ +msgid "" +msgstr "" +"Project-Id-Version: phangorn 3.0.0.0\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2024-02-14 13:59+0100\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language-Team: LANGUAGE \n" +"Language: \n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=UTF-8\n" +"Content-Transfer-Encoding: 8bit\n" + +#: Densi.R:134 +msgid "x must be of class multiPhylo" +msgstr "" + +#: addConfidences.R:99 treedist.R:244 +msgid "trees have different labels" +msgstr "" + +#: ancestral_pml.R:238 bab.R:278 bab.R:423 baseFreq.R:35 fitch64.R:264 parsimony.R:83 phylo.R:1340 sankoff.R:50 sankoff.R:118 sankoff.R:160 +msgid "data must be of class phyDat" +msgstr "" + +#: ancestral_pml.R:402 +msgid "" +"Node labels are not unique, used makeNodeLabel(tree, ...) to create them!" +msgstr "" + +#: bootstrap.R:214 +msgid "tree has different labels" +msgstr "" + +#: candidate_tree.R:2 +msgid "tau must be >= 0!" +msgstr "" + +#: candidate_tree.R:45 +msgid "Argument tip.dates is missing!" +msgstr "" + +#: clanistic.R:430 +msgid "The variable names have changed" +msgstr "" + +#: clanistic.R:514 +msgid "contrasts are not defined for 0 degrees of freedom" +msgstr "" + +#: codon.R:60 codon.R:79 dist.p.R:73 distSeq.R:67 distSeq.R:115 distSeq.R:234 +msgid "x must be of class phyDat" +msgstr "" + +#: codon.R:61 +msgid "x must be a nucleotide sequence!" +msgstr "" + +#: delta.score.R:61 +msgid "return options are: all, mean, or sd" +msgstr "" + +#: dist.p.R:119 +msgid "Levels of x are not in levels of cost matrix!" +msgstr "" + +#: distTree.R:384 +msgid "tree must be binary" +msgstr "" + +#: distTree.R:588 +msgid "invalid splits method" +msgstr "" + +#: distTree.R:589 +msgid "ambiguous splits method" +msgstr "" + +#: draw_CI.R:2 superTree.R:4 transferBootstrap.R:34 +msgid "trees must be of class multiPhylo" +msgstr "" + +#: draw_CI.R:10 draw_CI.R:123 +msgid "All trees need to be rooted!" +msgstr "" + +#: fitch64.R:253 phylo.R:1334 plotBS.R:112 sankoff.R:157 +msgid "tree must be of class phylo" +msgstr "" + +#: hadamard.R:187 hadamard.R:272 +msgid "Hadamard conjugation works only efficient for n < 24" +msgstr "" + +#: hadamard.R:212 +msgid "obj must be of class phyDat" +msgstr "" + +#: hadamard.R:213 hadamard.R:265 hadamard.R:266 +msgid "Error" +msgstr "" + +#: hadamard.R:221 +msgid "4-state Hadamard conjugation works only efficient for n < 12" +msgstr "" + +#: hash_phylo.R:9 +msgid "x must be rooted" +msgstr "" + +#: linkfun.R:49 +msgid "y values must be 0 <= y <= 1" +msgstr "" + +#: linkfun.R:52 +msgid "non-integer #successes in a binomial glm!" +msgstr "" + +#: linkfun.R:54 +msgid "non-integer counts in a binomial glm!" +msgstr "" + +#: linkfun.R:59 +msgid "for the binomial family, y must be a vector of 0 and 1's" +msgstr "" + +#: linkfun.R:60 +msgid "" +"or a 2 column matrix where col 1 is no. successes and col 2 is no. failures" +msgstr "" + +#: modelTest.R:96 +msgid "Labels in tree and data differ!" +msgstr "" + +#: networx.R:721 networx.R:726 networx.R:731 +msgid "split.color must be same length as splits" +msgstr "" + +#: networx.R:739 +msgid "type='3D' requires the package 'rgl', plotting in '2D' instead!" +msgstr "" + +#: phyDat.R:101 phyDat.R:171 +msgid "Alignments must have same type!" +msgstr "" + +#: phyDat.R:170 +msgid "Alignments have different # of characters!" +msgstr "" + +#: phyDat.R:173 +msgid "Duplicated names!" +msgstr "" + +#: phyDat.R:257 phyDat.R:263 phyDat.R:265 +msgid "subscript out of bounds" +msgstr "" + +#: phyDat.R:386 +msgid "each site contains at least one ambiguous state!" +msgstr "" + +#: phyDat.R:544 +msgid "the first line of the file must contain the dimensions of the data" +msgstr "" + +#: phyDat2.R:94 +msgid "data object must contain taxa names" +msgstr "" + +#: phyDat2.R:127 +msgid "Either argument levels or contrast has to be supplied" +msgstr "" + +#: phyDat2.R:146 phyDat2.R:227 +msgid "" +"Found unknown characters (not supplied in levels). Deleted sites with " +"unknown states." +msgstr "" + +#: phyDat2.R:321 +msgid "Found unknown characters. Deleted sites with unknown states." +msgstr "" + +#: phyDat_conversion.R:301 +msgid "x must be a nucleotide sequence" +msgstr "" + +#: phyDat_conversion.R:309 +msgid "x must be a amino acid sequence" +msgstr "" + +#: phylo.R:491 +msgid "model must be a formula object" +msgstr "" + +#: phylo.R:769 phylo.R:1374 +msgid "F3x4 not available for this data type" +msgstr "" + +#: phylo.R:771 phylo.R:1376 +msgid "F1x4 not available for this data type" +msgstr "" + +#: phylo.R:773 phylo.R:1378 +msgid "F61 not available for this data type" +msgstr "" + +#: phylo.R:992 +msgid "Error Mkv" +msgstr "" + +#: phylo.R:1335 +msgid "tree must have edge weights" +msgstr "" + +#: phylo.R:1336 +msgid "tree must have unique labels!" +msgstr "" + +#: phylo.R:1342 +msgid "tip labels are not in data" +msgstr "" + +#: phylo.R:1658 phylo.R:2093 +msgid "tree must be rooted" +msgstr "" + +#: phylo.R:2046 +msgid "I unrooted the tree" +msgstr "" + +#: phylo.R:2080 +msgid "rooted / unrooted tree needs at least 2 / 3 tips" +msgstr "" + +#: phylo.R:2088 +msgid "" +"You can't optimize edges and rates at the same time, only edges are " +"optimized!" +msgstr "" + +#: phylo.R:2167 +msgid "only one rate class, ignored optGamma" +msgstr "" + +#: phylo.R:2171 +msgid "cannot estimate invariant sites and Mkv model, ignored optInv" +msgstr "" + +#: plotAnc.R:63 +msgid "Tree needs nodelabel, which match the labels of the alignment!" +msgstr "" + +#: plotAnc.R:95 +msgid "Length of color vector differs from number of levels!" +msgstr "" + +#: plotBS.R:14 +msgid "All trees need to be rooted for method 'MCC'!" +msgstr "" + +#: plotBS.R:122 +msgid "" +"You need to supply 'trees' or the tree needs support-values as node.label" +msgstr "" + +#: pmlPart.R:254 +msgid "x must be of class 'multiphyDat' or a list of 'phyDat' objects" +msgstr "" + +#: pmlPen.R:6 +msgid "object has to be of class pmlPart or pmlMix" +msgstr "" + +#: pml_bb.R:84 +msgid "Please supply a model!" +msgstr "" + +#: pml_bb.R:128 +msgid "Could not find model!" +msgstr "" + +#: pml_bb.R:130 +msgid "Error, found several models!" +msgstr "" + +#: pml_bb.R:204 +msgid "Some parameters are unknown" +msgstr "" + +#: pml_control.R:63 +msgid "value of 'epsilon' must be > 0" +msgstr "" + +#: pml_control.R:65 pml_control.R:78 +msgid "maximum number of iterations must be > 0" +msgstr "" + +#: pml_control.R:67 +msgid "tau must be > 0" +msgstr "" + +#: pml_control.R:80 +msgid "minimum number of iterations must be > 0" +msgstr "" + +#: pml_control.R:82 +msgid "number of iterations must be > 0" +msgstr "" + +#: pml_control.R:84 +msgid "proportion of rearrangenemts must be > 0" +msgstr "" + +#: read.nexus.partitions.R:75 +msgid "does not contain Charset!" +msgstr "" + +#: read.nexus.splits.R:317 +msgid "File does not contain network block, return only splits!" +msgstr "" + +#: read.nexus.splits.R:320 +msgid "File does not contain network block!" +msgstr "" + +#: read.nexus.splits.R:438 +msgid "Problem" +msgstr "" + +#: read.phyDat.R:90 +msgid "sequences have different length" +msgstr "" + +#: simSeq.R:99 +msgid "l must be greater than 0!" +msgstr "" + +#: simSeq.R:139 +msgid "levels have to be supplied if type is USER" +msgstr "" + +#: splits.R:126 +msgid "x and y have different labels!" +msgstr "" + +#: splits.R:156 +msgid "names do not match previous names" +msgstr "" + +#: splits.R:279 +msgid "No split object included!" +msgstr "" + +#: splits.R:436 +msgid "obj must be of class splits" +msgstr "" + +#: splits.R:472 +msgid "x must be of class splits" +msgstr "" + +#: splits.R:475 +msgid "y must be of class splits" +msgstr "" + +#: treeManipulation.R:19 +msgid "There are apparently two root edges in your tree" +msgstr "" + +#: treeManipulation.R:133 +msgid "tree needs edge length" +msgstr "" + +#: treeManipulation.R:224 +msgid "no node labels" +msgstr "" + +#: treeManipulation.R:540 +#, c-format +msgid "That would generate %d trees, and take up more than %d MB of memory!" +msgstr "" + +#: treeManipulation.R:544 +msgid "A tree must have at least two taxa." +msgstr "" + +#: treeManipulation.R:547 +msgid "An unrooted tree must have at least three taxa." +msgstr "" + +#: treeManipulation.R:645 +msgid "Can't find supplied node in the labels" +msgstr "" + +#: treeManipulation.R:855 +msgid "one tree has a different number of tips" +msgstr "" + +#: treeManipulation.R:858 +msgid "one tree has different tip labels" +msgstr "" + +#: treeRearrangement.R:117 +msgid "Not enough edges for NNI rearrangements" +msgstr "" + +#: treeRearrangement.R:202 +msgid "k is chosen too big" +msgstr "" + +#: treeRearrangement.R:215 +msgid "trees must be binary" +msgstr "" + +#: treedist.R:176 treedist.R:253 treedist.R:373 treedist.R:526 treedist.R:573 treedist.R:629 +msgid "Some trees are not binary. Result may not what you expect!" +msgstr "" + +#: treedist.R:263 +msgid "number of bipartitions given to C_sprdist are not the same" +msgstr "" + +#: treedist.R:363 treedist.R:621 treedist.R:688 +msgid "one tree is unrooted, unrooted both" +msgstr "" + +#: treedist.R:409 treedist.R:463 treedist.R:521 treedist.R:563 treedist.R:725 treedist.R:773 +msgid "some trees were rooted, unrooted all" +msgstr "" diff --git a/po/phangorn.pot b/po/phangorn.pot new file mode 100644 index 00000000..fc82b5f4 --- /dev/null +++ b/po/phangorn.pot @@ -0,0 +1,12 @@ +msgid "" +msgstr "" +"Project-Id-Version: phangorn 3.0.0.0\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2024-02-14 13:59+0100\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language-Team: LANGUAGE \n" +"Language: \n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=UTF-8\n" +"Content-Transfer-Encoding: 8bit\n" From ad9117ab8df0872241ca6e940812d1fbc78fc290 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Wed, 6 Mar 2024 12:37:39 +0100 Subject: [PATCH 120/216] add gap_as_state --- R/gap_as_state.R | 100 ++++++++++++++++++++++++++++++++++++++++++++ man/gap_as_state.Rd | 4 +- 2 files changed, 103 insertions(+), 1 deletion(-) create mode 100644 R/gap_as_state.R diff --git a/R/gap_as_state.R b/R/gap_as_state.R new file mode 100644 index 00000000..aa346256 --- /dev/null +++ b/R/gap_as_state.R @@ -0,0 +1,100 @@ +#' Treat gaps as a state +#' +#' The function \code{gap_as_state} changes the contrast of an phyDat object to +#' treat as its own state. Internally \code{phyDat} are stored similar to a +#' \code{factor} objects and only the contrast matrix and some attributes +#' change. +#' +#' @param obj An object of class phyDat. +#' @param gap a character which codes for the gaps (default is "-"). +#' @param ambiguous a character which codes for the ambiguous state +#' @return The functions return an object of class \code{phyDat}. +#' @author Klaus Schliep \email{klaus.schliep@@gmail.com} +#' @seealso \code{\link{phyDat}} +#' @keywords cluster +#' @examples +#' data(Laurasiatherian) +#' tmp <- gap_as_state(Laurasiatherian) +#' contr <- attr(tmp, "contrast") +#' rownames(contr) <- attr(tmp, "allLevels") +#' contr +#' @rdname gap_as_state +#' @export +gap_as_state <- function(obj, gap="-", ambiguous="?"){ + if(has_gap_state(obj)) return(obj) +# if(!is.null(attr(obj, "gap_is_state")) & isTRUE(attr(obj, "gap_is_state"))) +# return(obj) + contrast <- cbind(attr(obj, "contrast"), gap = 0) + levels <- c(attr(obj, "levels"), gap) + colnames(contrast) <- levels + rownames(contrast) <- attr(obj, "allLevels") + contrast[gap, ] <- 0 + contrast[gap, gap] <- 1 + # todo check for ambiguous + contrast[ambiguous, "-"] <- 1 + rownames(contrast) <- NULL + attr(obj, "levels") <- levels + attr(obj, "nc") <- attr(obj, "nc") + 1L + attr(obj, "contrast") <- contrast + attr(obj, "gap_is_state") <- TRUE + obj +} + + +#' @rdname gap_as_state +#' @export +gap_as_ambiguous <- function(obj, gap="-"){ + if(!has_gap_state(obj)) return(obj) +# if(is.null(attr(obj, "gap_is_state")) | !isTRUE(attr(obj, "gap_is_state"))) +# return(obj) + contrast <- attr(obj, "contrast") + levels <- attr(obj, "levels") +# colnames(contrast) <- levels + rownames(contrast) <- attr(obj, "allLevels") + contrast[gap, ] <- 1 + rownames(contrast) <- NULL + ind <- match(gap, levels) + contrast <- contrast[,-ind] + attr(obj, "levels") <- levels[-ind] + attr(obj, "contrast") <- contrast + attr(obj, "nc") <- attr(obj, "nc") - 1L + attr(obj, "gap_is_state") <- FALSE + obj +} + + +has_gap_state <- function(obj){ + type <- attr(obj, "type") + if(type=="DNA" && attr(obj, "nc")==5) return(TRUE) + if(type=="AA" && attr(obj, "nc")==21) return(TRUE) + FALSE +} + + +add_gap_Q_AA <- function(Q, rate_gap=0.1){ + res <- matrix(0, 20, 20) + res[lower.tri(res)] <- Q + res <- cbind(rbind(res,rate_gap), 0) + res <- res[lower.tri(res)] + res +} + + +add_gap_bf_AA <- function(bf, gap=.01){ + bf <- c(bf, gap) + bf <- bf / sum(bf) + bf +} + + +remove_similar <- function(x, k=3, index=FALSE){ + dm <- dist.hamming(x, FALSE) + dm <- as.matrix(dm) + ind_dist <- which(dm < (k+1), arr.ind = TRUE) + ind_dist <- ind_dist[ind_dist[,1] < ind_dist[,2], ] + dist_i <- unique(ind_dist[, 2]) + if(index) return(dist_i) + x[-dist_i] +} + + diff --git a/man/gap_as_state.Rd b/man/gap_as_state.Rd index a02f390f..e5ea34b1 100644 --- a/man/gap_as_state.Rd +++ b/man/gap_as_state.Rd @@ -21,7 +21,9 @@ The functions return an object of class \code{phyDat}. } \description{ The function \code{gap_as_state} changes the contrast of an phyDat object to -treat as its own state. +treat as its own state. Internally \code{phyDat} are stored similar to a +\code{factor} objects and only the contrast matrix and some attributes +change. } \examples{ data(Laurasiatherian) From 7b634b5abed95df203c0efc4224f4a0bcf583a45 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Wed, 6 Mar 2024 12:38:20 +0100 Subject: [PATCH 121/216] update --- NAMESPACE | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/NAMESPACE b/NAMESPACE index 642acc15..4b90f9fe 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -45,6 +45,8 @@ S3method(cbind,phyDat) S3method(cophenetic,networx) S3method(cophenetic,splits) S3method(glance,phyDat) +S3method(glance,pml) +S3method(glance,pmlMix) S3method(hash,multiPhylo) S3method(hash,phylo) S3method(identify,networx) @@ -150,6 +152,8 @@ export(dna2codon) export(edQt) export(fhm) export(fitch) +export(gap_as_ambiguous) +export(gap_as_state) export(genlight2phyDat) export(getClans) export(getClips) From 02e31936abe1673294ea770fa62f7970bcd208a2 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Thu, 7 Mar 2024 08:17:43 +0100 Subject: [PATCH 122/216] bug fix --- R/ancestral_pml.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/ancestral_pml.R b/R/ancestral_pml.R index 1c70afc8..328d9daf 100644 --- a/R/ancestral_pml.R +++ b/R/ancestral_pml.R @@ -234,8 +234,8 @@ ancestral.pars <- function(tree, data, type = c("MPR", "ACCTRAN", "POSTORDER"), ind <- identical_sites(data) if(length(ind)>0){ for(k in (Ntip(tree)+1L):length(res)){ - if(return=="prob") result[[k]][ind,] <- result[[1]][ind,] - else result[[k]][ind] <- data[[1]][ind] + if(return=="prob") res[[k]][ind,] <- res[[1]][ind,] + else res[[k]][ind] <- data[[1]][ind] } } res From e069e460cbdfdfc8ff826c5cef0b88fde87fac09 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Thu, 7 Mar 2024 16:54:06 +0100 Subject: [PATCH 123/216] small improvements --- R/plotAnc.R | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/R/plotAnc.R b/R/plotAnc.R index bf5cd1eb..73af60ff 100644 --- a/R/plotAnc.R +++ b/R/plotAnc.R @@ -73,10 +73,17 @@ plotAnc <- function(tree, data, i = 1, site.pattern = FALSE, col = NULL, scheme <- match.arg(scheme, c("Ape_AA", "Zappo_AA", "Clustal", "Polarity", "Transmembrane_tendency", "Ape_NT", "RY_NT")) sc <- get(scheme, environment(ace)) - if(has_gap_state(data)){ + if(has_gap_state(data) && attr(data, "type")=="AA"){ sc$properties <- c(sc$properties, Gap="-") sc$color <- c(sc$color, "#FFFFFF") } + if(attr(data, "type")=="DNA"){ + ind <- match("n", names(sc$properties)) + if(!is.na(ind)){ + sc$properties <- sc$properties[-ind] + sc$color <- sc$color[-ind] + } + } P <- getTransition(sc, levels) y <- y %*% P levels <- colnames(P) From 4bf2863bcecacf7a7bddf8dec96d211af01fb570 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Thu, 7 Mar 2024 17:01:47 +0100 Subject: [PATCH 124/216] add composition_test --- R/baseFreq.R | 23 +++++++++++++++++++++++ man/baseFreq.Rd | 6 ++++++ 2 files changed, 29 insertions(+) diff --git a/R/baseFreq.R b/R/baseFreq.R index ebe332e8..8bcbaa08 100644 --- a/R/baseFreq.R +++ b/R/baseFreq.R @@ -3,6 +3,8 @@ #' \code{baseFreq} computes the frequencies (absolute or relative) of the states #' from a sample of sequences. #' \code{glance} computes some useful information about the alignment. +#' \code{composition_test} computes a \eqn{\Chi^2}-test testing if the state +#' composition for a species differs. #' #' @param obj,x as object of class phyDat #' @param freq logical, if 'TRUE', frequencies or counts are returned otherwise @@ -28,6 +30,7 @@ #' baseFreq(chloroplast) #' glance(Laurasiatherian) #' glance(chloroplast) +#' composition_test(Laurasiatherian)[1:10,] #' @rdname baseFreq #' @export baseFreq <- function(obj, freq=FALSE, all=FALSE, drop.unused.levels = FALSE){ @@ -74,3 +77,23 @@ glance.phyDat <- function (x, ...){ parsimony_informative_sites=parsimony_informative_sites, const_sites=const_site(x)) } + + +#' @rdname baseFreq +#' @export +composition_test <- function(obj){ + stopifnot(inherits(obj,"phyDat")) + labels <- attr(obj, "allLevels") + levs <- attr(obj, "levels") + weight <- attr(obj,"weight") + n <- length(obj) + ALL <- baseFreq(obj, freq=TRUE) + res <- matrix(0, n, 3, dimnames = list(names(obj), + c("statistic", "parameter df", "p-value"))) + for(i in seq_len(n)){ + tmp <- baseFreq(obj[i], freq=TRUE) + res[i, ] <- unlist(chisq.test(rbind(ALL-tmp, tmp))[1:3]) + } + res +} + diff --git a/man/baseFreq.Rd b/man/baseFreq.Rd index 970d5082..c071344a 100644 --- a/man/baseFreq.Rd +++ b/man/baseFreq.Rd @@ -3,11 +3,14 @@ \name{baseFreq} \alias{baseFreq} \alias{glance.phyDat} +\alias{composition_test} \title{Summaries of alignments} \usage{ baseFreq(obj, freq = FALSE, all = FALSE, drop.unused.levels = FALSE) \method{glance}{phyDat}(x, ...) + +composition_test(obj) } \arguments{ \item{obj, x}{as object of class phyDat} @@ -31,6 +34,8 @@ contrast.} \code{baseFreq} computes the frequencies (absolute or relative) of the states from a sample of sequences. \code{glance} computes some useful information about the alignment. +\code{composition_test} computes a \eqn{\Chi^2}-test testing if the state +composition for a species differs. } \examples{ @@ -43,6 +48,7 @@ baseFreq(Laurasiatherian, freq=TRUE) baseFreq(chloroplast) glance(Laurasiatherian) glance(chloroplast) +composition_test(Laurasiatherian)[1:10,] } \seealso{ \code{\link{phyDat}, \link{base.freq}, \link{glance}} From 7d02ccc94f5167067bdeec2b2bb429cb1aa6403a Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Thu, 7 Mar 2024 17:01:47 +0100 Subject: [PATCH 125/216] add composition_test --- NAMESPACE | 2 ++ R/baseFreq.R | 24 ++++++++++++++++++++++++ man/baseFreq.Rd | 6 ++++++ 3 files changed, 32 insertions(+) diff --git a/NAMESPACE b/NAMESPACE index 4b90f9fe..05a28e27 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -131,6 +131,7 @@ export(coalSpeciesTree) export(codon2dna) export(codonTest) export(compatible) +export(composition_test) export(consensusNet) export(coords) export(createLabel) @@ -299,6 +300,7 @@ importFrom(stats,BIC) importFrom(stats,aggregate) importFrom(stats,as.dist) importFrom(stats,binomial) +importFrom(stats,chisq.test) importFrom(stats,constrOptim) importFrom(stats,cophenetic) importFrom(stats,dbinom) diff --git a/R/baseFreq.R b/R/baseFreq.R index ebe332e8..1522cf73 100644 --- a/R/baseFreq.R +++ b/R/baseFreq.R @@ -3,6 +3,8 @@ #' \code{baseFreq} computes the frequencies (absolute or relative) of the states #' from a sample of sequences. #' \code{glance} computes some useful information about the alignment. +#' \code{composition_test} computes a \eqn{\Chi^2}-test testing if the state +#' composition for a species differs. #' #' @param obj,x as object of class phyDat #' @param freq logical, if 'TRUE', frequencies or counts are returned otherwise @@ -28,6 +30,7 @@ #' baseFreq(chloroplast) #' glance(Laurasiatherian) #' glance(chloroplast) +#' composition_test(Laurasiatherian)[1:10,] #' @rdname baseFreq #' @export baseFreq <- function(obj, freq=FALSE, all=FALSE, drop.unused.levels = FALSE){ @@ -74,3 +77,24 @@ glance.phyDat <- function (x, ...){ parsimony_informative_sites=parsimony_informative_sites, const_sites=const_site(x)) } + + +#' @rdname baseFreq +#' @importFrom stats chisq.test +#' @export +composition_test <- function(obj){ + stopifnot(inherits(obj,"phyDat")) + labels <- attr(obj, "allLevels") + levs <- attr(obj, "levels") + weight <- attr(obj,"weight") + n <- length(obj) + ALL <- baseFreq(obj, freq=TRUE) + res <- matrix(0, n, 3, dimnames = list(names(obj), + c("statistic", "parameter df", "p-value"))) + for(i in seq_len(n)){ + tmp <- baseFreq(obj[i], freq=TRUE) + res[i, ] <- unlist(chisq.test(rbind(ALL-tmp, tmp))[1:3]) + } + res +} + diff --git a/man/baseFreq.Rd b/man/baseFreq.Rd index 970d5082..c071344a 100644 --- a/man/baseFreq.Rd +++ b/man/baseFreq.Rd @@ -3,11 +3,14 @@ \name{baseFreq} \alias{baseFreq} \alias{glance.phyDat} +\alias{composition_test} \title{Summaries of alignments} \usage{ baseFreq(obj, freq = FALSE, all = FALSE, drop.unused.levels = FALSE) \method{glance}{phyDat}(x, ...) + +composition_test(obj) } \arguments{ \item{obj, x}{as object of class phyDat} @@ -31,6 +34,8 @@ contrast.} \code{baseFreq} computes the frequencies (absolute or relative) of the states from a sample of sequences. \code{glance} computes some useful information about the alignment. +\code{composition_test} computes a \eqn{\Chi^2}-test testing if the state +composition for a species differs. } \examples{ @@ -43,6 +48,7 @@ baseFreq(Laurasiatherian, freq=TRUE) baseFreq(chloroplast) glance(Laurasiatherian) glance(chloroplast) +composition_test(Laurasiatherian)[1:10,] } \seealso{ \code{\link{phyDat}, \link{base.freq}, \link{glance}} From b700fcc9e76b84fbd4b6e2ac2f76ef269a25ab20 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Mon, 18 Mar 2024 04:45:03 +0100 Subject: [PATCH 126/216] clean up --- R/bab.R | 103 ++++++++++++++++++++++++++------------------------------ 1 file changed, 47 insertions(+), 56 deletions(-) diff --git a/R/bab.R b/R/bab.R index 1f4bb31c..85aba682 100644 --- a/R/bab.R +++ b/R/bab.R @@ -16,12 +16,10 @@ getOrder <- function(x) { nr <- attr(x, "nr") storage.mode(nr) <- "integer" -# n <- length(x) #- 1L weight <- attr(x, "weight") storage.mode(weight) <- "double" -# m <- nr * (2L * nTips - 2L) f <- init_fitch(x, FALSE, FALSE, m=4L) edge <- tree$edge @@ -36,9 +34,7 @@ getOrder <- function(x) { while (length(remaining) > 0) { edge <- tree$edge[, 2] + 2 * nTips - f$prep_spr(tree$edge) - l <- length(remaining) res <- numeric(l) nt <- numeric(l) @@ -57,7 +53,21 @@ getOrder <- function(x) { } -pBound <- function(x, UB, LB) { +seq_stats <- function(x){ + nr <- attr(x, "nr") + contrast <- attr(x, "contrast") + a <- seq_len(nr) + STATE <- POS <- matrix(0L, nrow(contrast), nr) + for(i in seq_along(x)){ + IND <- cbind(x[[i]], a) + STATE[IND] <- STATE[IND] + 1L + POS[IND] <- i + } + list(state=STATE, position=POS) +} + +# Incompatibility lower Bound +ilb <- function(x, LB) { nr <- attr(x, "nr") contrast <- attr(x, "contrast") rownames(contrast) <- attr(x, "allLevels") @@ -70,6 +80,7 @@ pBound <- function(x, UB, LB) { singles <- attr(x, "levels") fun2 <- function(x, singles) all(x %in% singles) fun1 <- function(x) cumsum(!duplicated(x)) - 1L + fun3 <- function(x) sum(!duplicated(x)) - 1L tmp <- apply(y, 2, fun2, singles) ind <- which(tmp) @@ -77,32 +88,18 @@ pBound <- function(x, UB, LB) { y <- y[, ind, drop = FALSE] weight0 <- weight0[ind] -# print(sum(weight0)) - UB <- UB[, ind, drop = FALSE] - single_dis <- apply(y, 2, fun1) - # single_dis <- LB - + single_dis <- LB[, ind] nTips <- nrow(y) l <- length(weight0) res <- numeric(nTips) - for (i in 1:(l - 1)) { for (j in (i + 1):l) { - # cat(i, j, "\n") if ((weight0[i] > 0) & (weight0[j] > 0)) { z <- paste(y[, i], y[, j], sep = "_") dis2 <- single_dis[, i] + single_dis[, j] - # D1 <- (dis2[nTips] - dis2) dis <- fun1(z) - # dis <- pmax(dis, dis2) - # D2 <- dis[nTips] - (UB[, i] + UB[, j]) if (dis[nTips] > dis2[nTips]) { -# ub <- UB[, i] + UB[, j] -# dis <- dis[nTips] - ub -# d2 <- dis2[nTips] - dis2 -# dis <- pmax(dis, d2) - d2 - # dis <- pmax(dis, dis2) - dis2 - dis <- dis - dis2 + dis <- pmax(dis, dis2) - dis2 if (sum(dis[4:nTips]) > 0) { wmin <- min(weight0[i], weight0[j]) weight0[i] <- weight0[i] - wmin @@ -114,12 +111,10 @@ pBound <- function(x, UB, LB) { if(weight0[i] < 1e-6) break() } } -# print(sum(weight0)) res } - #' Branch and bound for finding all most parsimonious trees #' #' \code{bab} finds all most parsimonious trees. @@ -140,6 +135,8 @@ pBound <- function(x, UB, LB) { #' @param tree a phylogenetic tree an object of class phylo, otherwise a #' pratchet search is performed. #' @param trace defines how much information is printed during optimization. +## @param ILBound compute incompatibility lower bound (default TRUE) of +## Holland (2005). #' @param \dots Further arguments passed to or from other methods #' @return \code{bab} returns all most parsimonious trees in an object of class #' \code{multiPhylo}. @@ -165,18 +162,19 @@ pBound <- function(x, UB, LB) { #' gene12 <- yeast[, 1:3158] #' trees <- bab(gene12) #' -#' @export bab +#' @export bab <- function(data, tree = NULL, trace = 0, ...) { + if (hasArg(ILBound)) + ILBound <- list(...)$ILBound + else ILBound <- FALSE if(inherits(data, "DNAbin") | inherits(data, "AAbin")) data <- as.phyDat(data) if (!inherits(data, "phyDat")) stop("data must be of class phyDat") - if (!is.null(tree)) data <- subset(data, tree$tip.label) - pBound <- FALSE - + compress <- TRUE + recursive <- TRUE nTips <- length(data) if (nTips < 4) return(stree(nTips, tip.label = names(data))) - # New - data <- removeParsimonyUninfomativeSites(data, recursive=TRUE) + data <- removeParsimonyUninfomativeSites(data, recursive=recursive) star_tree <- ifelse(attr(data, "nr") == 0, TRUE, FALSE) add_taxa <- ifelse(is.null(attr(data, "duplicated")), FALSE, TRUE) p0 <- attr(data, "p0") @@ -193,37 +191,31 @@ bab <- function(data, tree = NULL, trace = 0, ...) { # compress sequences (all transitions count equal) data <- compressSites(data) - o <- order(attr(data, "weight"), decreasing = TRUE) data <- subset(data, select = o, site.pattern=TRUE) - tree <- pratchet(data, start = tree, trace = trace - 1, maxit=10, ...) - - data <- subset(data, tree$tip.label) nr <- as.integer(attr(data, "nr")) inord <- getOrder(data) - nTips <- m <- length(data) + data <- data[inord,] + tree <- pratchet(data, start = tree, trace = trace - 1, maxit=10, ...) + p_vec <- fitch(tree, data, "site") - nr <- as.integer(attr(data, "nr")) - TMP <- UB <- matrix(0, m, nr) - for (i in 4:m) { - TMP[i, ] <- lowerBound(subset(data, inord[1:i])) - UB[i, ] <- upperBound(subset(data, inord[1:i])) + nTips <- m <- length(data) + TMP <- matrix(0, m, nr) # UB <- + for (i in 2:m) { + TMP[i, ] <- lowerBound(data[1:i,]) } - dat_used <- subset(data, inord) - weight <- as.double(attr(data, "weight")) m <- nr * (2L * nTips - 2L) - + # Single column discrepancy mmsAmb <- TMP %*% weight -# mmsAmb <- mmsAmb[nTips] - mmsAmb - mms0 <- 0 - if (pBound) mms0 <- pBound(dat_used, UB, TMP) - mms0 <- mms0 + mmsAmb + # mmsAmb <- mmsAmb[nTips] - mmsAmb + mms0 <- mms1 <- 0 + if (ILBound) mms1 <- ilb(data, TMP) + mms0 <- mms1 + mmsAmb mms0 <- mms0[nTips] - mms0 - mms0 <- c(mms0, 0) f <- init_fitch(data, m=4L) @@ -233,7 +225,7 @@ bab <- function(data, tree = NULL, trace = 0, ...) { if (trace > 1) print(paste("upper bound:", bound + p0)) startTree <- structure(list(edge = structure(c(rep(nTips + 1L, 3), - as.integer(inord)[1:3]), .Dim = c(3L, 2L)), tip.label = tree$tip.label, + as.integer(1:3)), .Dim = c(3L, 2L)), tip.label = names(data), Nnode = 1L), .Names = c("edge", "tip.label", "Nnode"), class = "phylo", order = "postorder") @@ -266,15 +258,15 @@ bab <- function(data, tree = NULL, trace = 0, ...) { edge <- tmpTree[, 2] + 2 * nTips f$prep_spr(tmpTree) - score <- f$pscore_vec(edge, as.integer(inord[a + 1L])) + score <- f$pscore_vec(edge, as.integer(a + 1L)) score <- score + blub + mms0[a + 1L] ms <- min(score) if (ms < bound + .1) { if ((a + 1L) < nTips) { ind <- (1:L[a])[score <= bound] trees[[a + 1]][seq_along(ind)] <- .Call('AddOnes', tmpTree, - as.integer(inord[a + 1L]), as.integer(ind), as.integer(L[a]), - as.integer(M[a])) + as.integer(a + 1L), as.integer(ind), as.integer(L[a]), + as.integer(M[a])) l <- length(ind) # os <- order(score[ind], decreasing=TRUE) os <- seq_len(l) @@ -288,8 +280,8 @@ bab <- function(data, tree = NULL, trace = 0, ...) { ind <- which(score == ms) tmp <- vector("list", length(ind)) tmp[seq_along(ind)] <- .Call('AddOnes', tmpTree, - as.integer(inord[a + 1L]), as.integer(ind), - as.integer(L[a]), as.integer(M[a])) + as.integer(a + 1L), as.integer(ind), + as.integer(L[a]), as.integer(M[a])) if (ms < bound) { bound <- ms if (trace) cat("upper bound:", bound + p0, "\n") @@ -303,12 +295,11 @@ bab <- function(data, tree = NULL, trace = 0, ...) { } for (i in seq_along(result)) { result[[i]] <- structure(list(edge = result[[i]], Nnode = nTips - 2L), - .Names = c("edge", "Nnode"), class = "phylo", order = "postorder") + .Names = c("edge", "Nnode"), class = "phylo", order = "postorder") } attr(result, "TipLabel") <- tree$tip.label - attr(result, "visited") <- visited class(result) <- "multiPhylo" if(add_taxa) result <- addTaxa(result, attr(data, "duplicated")) + attr(result, "visited") <- visited return(result) } - From ed29611cff1bae6954bf73b331d93a2bb17b637d Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Mon, 18 Mar 2024 06:51:25 +0100 Subject: [PATCH 127/216] update man page --- man/bab.Rd | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/man/bab.Rd b/man/bab.Rd index 4f90ab72..923d0cf2 100644 --- a/man/bab.Rd +++ b/man/bab.Rd @@ -5,7 +5,7 @@ \alias{BranchAndBound} \title{Branch and bound for finding all most parsimonious trees} \usage{ -bab(data, tree = NULL, trace = 0, ILBound = TRUE, ...) +bab(data, tree = NULL, trace = 0, ...) } \arguments{ \item{data}{an object of class phyDat.} @@ -15,9 +15,6 @@ pratchet search is performed.} \item{trace}{defines how much information is printed during optimization.} -\item{ILBound}{compute incompatibility lower bound (default TRUE) of -Holland (2005).} - \item{\dots}{Further arguments passed to or from other methods} } \value{ From 20507bfb9b4820729ae796bd72f383e9debb9073 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Tue, 26 Mar 2024 10:53:11 +0100 Subject: [PATCH 128/216] small improvement --- vignettes/AdvancedFeatures.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vignettes/AdvancedFeatures.Rmd b/vignettes/AdvancedFeatures.Rmd index 3e73e9c7..0d94ecec 100644 --- a/vignettes/AdvancedFeatures.Rmd +++ b/vignettes/AdvancedFeatures.Rmd @@ -73,7 +73,7 @@ contrast gapsdata3 <- phyDat(data, type="USER", contrast=contrast) gapsdata3 ``` -Here we defined "n" as a state which can be any nucleotide, but not a gap. "-" and "?" can be any state including a gap. +Here we defined "n" as a state which can be any nucleotide, but not a gap. And "?" can be any state including a gap. These data can be used in all functions available in _phangorn_ to compute distance matrices or perform parsimony and maximum likelihood analysis. From 20f628242981af729fa78ccae814e5361219cec9 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Wed, 3 Apr 2024 15:27:36 +0200 Subject: [PATCH 129/216] get rid of note --- NAMESPACE | 1 + R/baseFreq.R | 2 +- man/baseFreq.Rd | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 05a28e27..b2069a96 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -303,6 +303,7 @@ importFrom(stats,binomial) importFrom(stats,chisq.test) importFrom(stats,constrOptim) importFrom(stats,cophenetic) +importFrom(stats,cor) importFrom(stats,dbinom) importFrom(stats,dgamma) importFrom(stats,ecdf) diff --git a/R/baseFreq.R b/R/baseFreq.R index 1522cf73..8e8629d7 100644 --- a/R/baseFreq.R +++ b/R/baseFreq.R @@ -3,7 +3,7 @@ #' \code{baseFreq} computes the frequencies (absolute or relative) of the states #' from a sample of sequences. #' \code{glance} computes some useful information about the alignment. -#' \code{composition_test} computes a \eqn{\Chi^2}-test testing if the state +#' \code{composition\_test} computes a \eqn{\chi^2}-test testing if the state #' composition for a species differs. #' #' @param obj,x as object of class phyDat diff --git a/man/baseFreq.Rd b/man/baseFreq.Rd index c071344a..5ba3339e 100644 --- a/man/baseFreq.Rd +++ b/man/baseFreq.Rd @@ -34,7 +34,7 @@ contrast.} \code{baseFreq} computes the frequencies (absolute or relative) of the states from a sample of sequences. \code{glance} computes some useful information about the alignment. -\code{composition_test} computes a \eqn{\Chi^2}-test testing if the state +\code{composition\_test} computes a \eqn{\chi^2}-test testing if the state composition for a species differs. } \examples{ From a43c1366754e664d195ff2b67ffa1e898fb36e6a Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Wed, 3 Apr 2024 15:29:33 +0200 Subject: [PATCH 130/216] fix ape link and add r-universe link --- README.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index a0accbb8..406c5ffe 100644 --- a/README.md +++ b/README.md @@ -12,6 +12,8 @@ phangorn is a package for phylogenetic reconstruction and analysis in the R lang You can install - the latest released version `install.packages("phangorn")` - the latest development version `remotes::install_github("KlausVigo/phangorn")` +- [r-universe](https://r-universe.dev/) kindly provides binaries for Windows, +Linux and OS X of the development version [here](https://klausvigo.r-universe.dev/phangorn#). To install the development version you may need to install the Biostrings and seqLogo package from bioconductor first: ``` @@ -20,7 +22,8 @@ if (!requireNamespace("BiocManager", quietly = TRUE)) BiocManager::install(c("Biostrings", "seqLogo")) ``` Also the development version usually depends on the latest ape development -version and information to download can be found [here](http://ape-package.ird.fr/ape_installation.html). +version and information to download can be found +[here](https://emmanuelparadis.github.io/ape_installation.html). Additionally you may need to install on windows [Rtools](https://cran.r-project.org/bin/windows/Rtools/) and on mac [XCode](https://developer.apple.com/xcode/) and [GFortran](https://gcc.gnu.org/wiki/GFortranBinaries). From 885dda5480993bcd97bcd2c0411df0e933ca76de Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Wed, 3 Apr 2024 15:46:35 +0200 Subject: [PATCH 131/216] small improvement and new check functions --- R/candidate_tree.R | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/R/candidate_tree.R b/R/candidate_tree.R index 754725c3..6d186b4c 100644 --- a/R/candidate_tree.R +++ b/R/candidate_tree.R @@ -24,6 +24,7 @@ minEdge <- function(tree, tau=1e-8, enforce_ultrametric=FALSE){ #' @rdname phangorn-internal +#' @importFrom stats cor #' @export candidate_tree <- function(x, method=c("unrooted", "ultrametric", "tipdated"), eps = 1e-8, tip.dates=NULL, ...){ @@ -35,7 +36,8 @@ candidate_tree <- function(x, method=c("unrooted", "ultrametric", "tipdated"), enforce_ultrametric <- TRUE } if(method=="unrooted"){ - tree <- pratchet(x, maxit=5L, trace=0, perturbation = "stochastic") + tree <- pratchet(x, maxit=5L, trace=0, perturbation = "stochastic", + all=FALSE) tree <- multi2di(tree) tree <- unroot(tree) tree <- acctran(tree, x) @@ -49,3 +51,23 @@ candidate_tree <- function(x, method=c("unrooted", "ultrametric", "tipdated"), } minEdge(tree, tau=eps, enforce_ultrametric=enforce_ultrametric) } + + +# like is.ultrametric +check_tip_dates <- function(tree, tip.dates){ + tip.dates <- tip.dates[tree$tip.label] + nh <- node.depth.edgelength(tree)[seq_along(tree$tip.label)] + isTRUE(all.equal(cor(tip.dates,nh), 1)) +} + + +proper_tree <- function(x, tree, method=c("ultrametric", "tipdated"), + tip.dates=NULL, eps = 1e-8, ...){ + method <- match.arg(method, c("ultrametric", "tipdated")) + if(!is.null(tree$edge.length)){ + if(method=="ultrametric" && is.ultrametric(tree)) return(tree) + if(method=="tipdated" && check_tip_dates(tree, tip.dates)) return(tree) + } + dm <- dist.ml(x, ...) + nnls.tree(dm, tree, method) +} From 97a3925d95461f2e0b4c297be301984b9f4af340 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Wed, 3 Apr 2024 16:01:44 +0200 Subject: [PATCH 132/216] make it more robust --- R/bab.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/bab.R b/R/bab.R index 85aba682..2b2a4801 100644 --- a/R/bab.R +++ b/R/bab.R @@ -197,7 +197,8 @@ bab <- function(data, tree = NULL, trace = 0, ...) { nr <- as.integer(attr(data, "nr")) inord <- getOrder(data) data <- data[inord,] - tree <- pratchet(data, start = tree, trace = trace - 1, maxit=10, ...) + tree <- pratchet(data, start = tree, trace = trace - 1, maxit=10, + all=FALSE, ...) p_vec <- fitch(tree, data, "site") nTips <- m <- length(data) From 0dec81b27105e1a232c07cbbf8ede6946844d16b Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Wed, 3 Apr 2024 16:15:35 +0200 Subject: [PATCH 133/216] replace warning by message --- R/treedist.R | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/R/treedist.R b/R/treedist.R index 462047e2..5b4d2e60 100644 --- a/R/treedist.R +++ b/R/treedist.R @@ -406,7 +406,7 @@ wRF2 <- function(tree, trees, normalize = FALSE, check.labels = TRUE, trees <- .uncompressTipLabel(trees) if (rooted && any(!is.rooted(trees))) { - warning("some trees were rooted, unrooted all") + message("some trees were rooted, unrooted all") rooted <- FALSE } @@ -460,7 +460,7 @@ wRF1 <- function(trees, normalize = FALSE, check.labels = TRUE, trees <- .uncompressTipLabel(trees) if (rooted && any(!is.rooted(trees))) { - warning("some trees were rooted, unrooted all") + message("some trees were rooted, unrooted all") rooted <- FALSE } if (!rooted) { @@ -518,7 +518,7 @@ mRF2 <- function(tree, trees, normalize = FALSE, check.labels = TRUE, if (has.singles(tree)) tree <- collapse.singles(tree) if (!rooted && any(is.rooted(trees))) { - warning("some trees were rooted, unrooted all") + message("some trees were rooted, unrooted all") trees <- unroot(trees) } if (!rooted && is.rooted(tree)) tree <- unroot(tree) @@ -560,7 +560,7 @@ mRF <- function(trees, normalize = FALSE, rooted = FALSE) { RF <- numeric( (l * (l - 1)) / 2) if (rooted && any(!is.rooted(trees))) { - warning("some trees were rooted, unrooted all") + message("some trees were rooted, unrooted all") rooted <- FALSE } if (!rooted) { @@ -722,7 +722,7 @@ kf1 <- function(tree, trees, check.labels = TRUE, rooted = FALSE) { if (has.singles(tree)) tree <- collapse.singles(tree) if (rooted && any(!is.rooted(trees))) { - warning("some trees were rooted, unrooted all") + message("some trees were rooted, unrooted all") rooted <- FALSE } if (!rooted) { @@ -770,7 +770,7 @@ kf2 <- function(trees, check.labels = TRUE, rooted = FALSE) { nTips <- length(trees[[1]]$tip.label) if (rooted && any(!is.rooted(trees))) { - warning("some trees were rooted, unrooted all") + message("some trees were rooted, unrooted all") rooted <- FALSE } if (!rooted && any(is.rooted(trees))) { From b1af5512ce4bf97e503b26e09c03fcc4e738b7ff Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Wed, 3 Apr 2024 17:06:33 +0200 Subject: [PATCH 134/216] add example --- R/networx.R | 1 + man/plot.networx.Rd | 1 + 2 files changed, 2 insertions(+) diff --git a/R/networx.R b/R/networx.R index 887cdcb9..8a1aadc9 100644 --- a/R/networx.R +++ b/R/networx.R @@ -673,6 +673,7 @@ rotate_matrix <- function(x, theta){ #' sp <- as.splits(rNNI(tree1, n=10)) #' net <- as.networx(sp) #' plot(net) +#' plot(net, direction="axial") #' \dontrun{ #' # also see example in consensusNet #' example(consensusNet) diff --git a/man/plot.networx.Rd b/man/plot.networx.Rd index 1ae7beb3..e29fb925 100644 --- a/man/plot.networx.Rd +++ b/man/plot.networx.Rd @@ -101,6 +101,7 @@ tree1 <- rtree(20, rooted=FALSE) sp <- as.splits(rNNI(tree1, n=10)) net <- as.networx(sp) plot(net) +plot(net, direction="axial") \dontrun{ # also see example in consensusNet example(consensusNet) From 20495f3ba8f58e924d92017010f3e5cf69b9c91b Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Wed, 3 Apr 2024 17:26:02 +0200 Subject: [PATCH 135/216] clean up --- R/gap_as_state.R | 2 -- 1 file changed, 2 deletions(-) diff --git a/R/gap_as_state.R b/R/gap_as_state.R index aa346256..f0ea845f 100644 --- a/R/gap_as_state.R +++ b/R/gap_as_state.R @@ -96,5 +96,3 @@ remove_similar <- function(x, k=3, index=FALSE){ if(index) return(dist_i) x[-dist_i] } - - From 8c6bb76511eab9564db0c22ef81d7bd0cc11fd65 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Wed, 3 Apr 2024 17:39:42 +0200 Subject: [PATCH 136/216] new function dna2aa --- NAMESPACE | 1 + R/codon.R | 18 ++++++++++++++---- man/dna2codon.Rd | 6 +++++- 3 files changed, 20 insertions(+), 5 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index b2069a96..401d4ceb 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -149,6 +149,7 @@ export(dist.p) export(distanceHadamard) export(distinct.splits) export(diversity) +export(dna2aa) export(dna2codon) export(edQt) export(fhm) diff --git a/R/codon.R b/R/codon.R index 12a011c3..00856360 100644 --- a/R/codon.R +++ b/R/codon.R @@ -1,7 +1,8 @@ #' Translate nucleic acid sequences into codons #' #' The function transforms \code{dna2codon} DNA sequences to codon sequences, -#' \code{codon2dna} transform the other way. +#' \code{codon2dna} transform the other way. \code{dna2codon} translates +#' nucleotide to amino acids using the function \code{\link{trans}}. #' #' The following genetic codes are described here. The number preceding each #' corresponds to the code argument. @@ -58,8 +59,7 @@ #' @export dna2codon <- function(x, codonstart=1, code=1, ambiguity="---", ...){ if(!inherits(x, "phyDat"))stop("x must be of class phyDat") - if(attr(x, "type")=="AA")stop("x must be a nucleotide sequence!") - + if(attr(x, "type")!="DNA")stop("x must be a nucleotide sequence!") if(codonstart>1){ del <- -seq_len(codonstart) x <- subset(x, select=del, site.pattern=FALSE) @@ -76,11 +76,21 @@ dna2codon <- function(x, codonstart=1, code=1, ambiguity="---", ...){ #' @rdname dna2codon #' @export codon2dna <- function(x){ - if(!inherits(x, "phyDat"))stop("x must be of class phyDat") + stopifnot(inherits(x, "phyDat")) phyDat.DNA(as.character(x)) } +#' @rdname dna2codon +#' @export +dna2aa <- function(x, codonstart=1, code=1){ + stopifnot(inherits(x, "phyDat")) + if(attr(x, "type")!="DNA")stop("x must be a nucleotide sequence!") + dna <- as.DNAbin(x) + aa <- as.phyDat(translate(dna, code=code, codonstart=codonstart)) +} + + synonymous_subs <- function(code=1, stop.codon=FALSE){ tmp <- .CODON[, as.character(code)] label <- rownames(.CODON) diff --git a/man/dna2codon.Rd b/man/dna2codon.Rd index e6471bf9..f9077f4c 100644 --- a/man/dna2codon.Rd +++ b/man/dna2codon.Rd @@ -3,11 +3,14 @@ \name{dna2codon} \alias{dna2codon} \alias{codon2dna} +\alias{dna2aa} \title{Translate nucleic acid sequences into codons} \usage{ dna2codon(x, codonstart = 1, code = 1, ambiguity = "---", ...) codon2dna(x) + +dna2aa(x, codonstart = 1, code = 1) } \arguments{ \item{x}{An object containing sequences.} @@ -29,7 +32,8 @@ The functions return an object of class \code{phyDat}. } \description{ The function transforms \code{dna2codon} DNA sequences to codon sequences, -\code{codon2dna} transform the other way. +\code{codon2dna} transform the other way. \code{dna2codon} translates +nucleotide to amino acids using the function \code{\link{trans}}. } \details{ The following genetic codes are described here. The number preceding each From 1a92ed27771ecdd3bd44e0f0fca98d8c857f3fdf Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Thu, 4 Apr 2024 16:51:36 +0200 Subject: [PATCH 137/216] less print out --- R/parsimony.R | 43 ++++++++++++++++++------------------------- 1 file changed, 18 insertions(+), 25 deletions(-) diff --git a/R/parsimony.R b/R/parsimony.R index 6c40abf4..a8b6c109 100644 --- a/R/parsimony.R +++ b/R/parsimony.R @@ -110,7 +110,7 @@ compressSites <- function(data) { ntaxa <- nrow(data) res <- vector("list", ntaxa) for(i in seq_len(ntaxa)) res[[i]] <- data[i, pos] - attrData$weight <- tapply(attrData$weight, index, sum) + attrData$weight <- as.vector(tapply(attrData$weight, index, sum)) attrData$index <- NULL attrData$nr <- length(attrData$weight) attrData$compressed <- TRUE @@ -311,6 +311,7 @@ pratchet <- function(data, start = NULL, method = "fitch", maxit = 1000, rearrangements = "SPR", perturbation = "ratchet", ...) { if(inherits(data, "DNAbin") || inherits(data, "AAbin")) data <- as.phyDat(data) + printevery <- 10L eps <- 1e-08 trace <- trace - 1 ref <- names(data) @@ -346,10 +347,9 @@ pratchet <- function(data, start = NULL, method = "fitch", maxit = 1000, return(tree) } - if (perturbation != "random_addition"){ - if(is.null(start)) start <- optim.parsimony(fastme.ols(dist.hamming(data)), - data, trace = trace-1, method = method, - rearrangements = rearrangements, ...) + if(is.null(start)) start <- optim.parsimony(random.addition(data), + data, trace = trace-1, method = method, + rearrangements = rearrangements, ...) tree <- start label <- intersect(tree$tip.label, names(data)) if (!is.binary(tree)){ @@ -361,8 +361,7 @@ pratchet <- function(data, start = NULL, method = "fitch", maxit = 1000, attr(tree, "pscore") <- parsimony(tree, data, method = method, ...) mp <- attr(tree, "pscore") if (trace >= 0) - print(paste("Best pscore so far:", attr(tree, "pscore"))) - } + cat("Parsimony score of initial tree:", attr(tree, "pscore"), "\n") FUN <- function(data, tree, method, rearrangements, ...) optim.parsimony(tree, data = data, method = method, rearrangements = rearrangements, ...) @@ -371,12 +370,10 @@ pratchet <- function(data, start = NULL, method = "fitch", maxit = 1000, result <- addTaxa(result, attr(data, "duplicated")) } result <- relabel(result, ref) +# if (trace > 1) cat("optimize topology (NNI): ", pscore, "-->", psc, "\n") hr <- hash(result) on.exit({ if (!all && inherits(result, "multiPhylo")) result <- result[[1]] -# if(!is.null(attr(data, "duplicated"))) -# result <- addTaxa(result, attr(data, "duplicated")) - # else class(result) <- "multiPhylo" if (length(result) == 1) result <- result[[1]] env <- new.env() start_trees <- start_trees[seq_len(i)] @@ -405,22 +402,17 @@ pratchet <- function(data, start = NULL, method = "fitch", maxit = 1000, bs_ind <- which(bsw > 0) bs_data <- getRows(data, bs_ind) attr(bs_data, "weight") <- bsw[bs_ind] - if(length(bs_ind) > 0)p_trees <- optim.parsimony(tree, bs_data, + if(length(bs_ind) > 0){ + # p_trees <- random.addition(bs_data) # 3 * ?? + p_trees <- optim.parsimony(tree, bs_data, trace = trace, method = method, rearrangements = rearrangements, ...) + } else p_trees <- stree(length(data), tip.label = names(data)) - trees <- optim.parsimony(p_trees, data, trace = trace, - method = method, rearrangements = rearrangements, ...) - } - if (perturbation == "stochastic") { - p_trees <- rNNI(tree, floor(nTips / 2)) - trees <- optim.parsimony(p_trees, data, trace = trace, method = method, - rearrangements = rearrangements, ...) - } - if (perturbation == "random_addition") { - p_trees <- random.addition(data) - trees <- optim.parsimony(p_trees, data, trace = trace, method = method, - rearrangements = rearrangements, ...) } + if (perturbation == "stochastic") p_trees <- rNNI(tree, floor(nTips / 2)) + if (perturbation == "random_addition") p_trees <- random.addition(data) + trees <- optim.parsimony(p_trees, data, trace = trace, method = method, + rearrangements = rearrangements, ...) curr_tree <- trees if(!is.null(attr(data, "duplicated"))){ p_trees <- addTaxa(p_trees, attr(data, "duplicated")) @@ -449,8 +441,9 @@ pratchet <- function(data, start = NULL, method = "fitch", maxit = 1000, } } } - if (trace >= 0) - print(paste("Best pscore so far:", mp)) + if (trace >= 0 && (!i%%printevery)) + cat("\rIteration: ", i, ". Best parsimony score so far: ", mp, sep="") if ( (kmax >= k) && (i >= minit)) break() } # for + if (trace >= 0)cat("\n") } # pratchet From aa3bc61e679e51a010ec34cb19458343c44d77d2 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Thu, 4 Apr 2024 16:52:02 +0200 Subject: [PATCH 138/216] clean up --- R/upgma.R | 24 +++++++++++++----------- man/upgma.Rd | 11 +++-------- 2 files changed, 16 insertions(+), 19 deletions(-) diff --git a/R/upgma.R b/R/upgma.R index b79d02c0..7ac5275d 100644 --- a/R/upgma.R +++ b/R/upgma.R @@ -1,19 +1,19 @@ #' UPGMA, WPGMA and sUPGMA #' #' UPGMA and WPGMA clustering. UPGMA and WPGMA are a wrapper function around -#' \code{\link[stats]{hclust}} retuning a \code{phylo} object. UPGMA -#' additionally performs nearest neighbor interchange (NNI) tree rearrangements -#' to improve the phylogeny (Schliep et al. 2023). +#' \code{\link[stats]{hclust}} returning a \code{phylo} object. +## UPGMA additionally performs nearest neighbor interchange (NNI) tree rearrangements +## to improve the phylogeny (Schliep et al. 2024). #' \code{supgma} perform serial sampled UPGMA similar to Drummond and Rodrigo -#' (2000) however also performing NNI rearrangements. -#' +#' (2000). +## and also performing NNI rearrangements. #' #' @param D A distance matrix. #' @param method The agglomeration method to be used. This should be (an #' unambiguous abbreviation of) one of "ward", "single", "complete", "average", #' "mcquitty", "median" or "centroid". The default is "average". -#' @param NNI logical whether make nearest neighbor rearrangements to improve the -#' tree. Currently only available for \code{method="average"}. +## @param NNI logical whether make nearest neighbor rearrangements to improve the +## tree. Currently only available for \code{method="average"}. #' @param trace Show output during optimization (see details). #' @param tip.dates A named vector of sampling times associated to the tips. #' @param \dots Further arguments passed to or from other methods. @@ -42,12 +42,13 @@ #' #' @rdname upgma #' @export -upgma <- function(D, method = "average", NNI=FALSE, ...) { +upgma <- function(D, method = "average", ...) { + method <- match.arg(method, c("average", "ward.D", "ward.D2", "single", + "complete", "average", "mcquitty", "median", "centroid")) DD <- as.dist(D) hc <- hclust(DD, method = method) result <- as.phylo(hc) -# if(NNI){ -# stopifnot(method=="average") +# if(NNI && method=="average"){ # result <- upgma_nni(DD, tree=result, ...) # } result <- reorder(result, "postorder") @@ -58,6 +59,8 @@ upgma <- function(D, method = "average", NNI=FALSE, ...) { #' @rdname upgma #' @export wpgma <- function(D, method = "mcquitty", ...) { + method <- match.arg(method, c("average", "ward.D", "ward.D2", "single", + "complete", "average", "mcquitty", "median", "centroid")) DD <- as.dist(D) hc <- hclust(DD, method = method, ...) result <- as.phylo(hc) @@ -66,7 +69,6 @@ wpgma <- function(D, method = "mcquitty", ...) { } - #' @rdname upgma #' @export supgma <- function(D, tip.dates, trace=0){ diff --git a/man/upgma.Rd b/man/upgma.Rd index 038e6dbe..ddf6be05 100644 --- a/man/upgma.Rd +++ b/man/upgma.Rd @@ -6,7 +6,7 @@ \alias{supgma} \title{UPGMA, WPGMA and sUPGMA} \usage{ -upgma(D, method = "average", NNI = FALSE, ...) +upgma(D, method = "average", ...) wpgma(D, method = "mcquitty", ...) @@ -19,9 +19,6 @@ supgma(D, tip.dates, trace = 0) unambiguous abbreviation of) one of "ward", "single", "complete", "average", "mcquitty", "median" or "centroid". The default is "average".} -\item{NNI}{logical whether make nearest neighbor rearrangements to improve the -tree. Currently only available for \code{method="average"}.} - \item{\dots}{Further arguments passed to or from other methods.} \item{tip.dates}{A named vector of sampling times associated to the tips.} @@ -33,11 +30,9 @@ A phylogenetic tree of class \code{phylo}. } \description{ UPGMA and WPGMA clustering. UPGMA and WPGMA are a wrapper function around -\code{\link[stats]{hclust}} retuning a \code{phylo} object. UPGMA -additionally performs nearest neighbor interchange (NNI) tree rearrangements -to improve the phylogeny (Schliep et al. 2023). +\code{\link[stats]{hclust}} returning a \code{phylo} object. \code{supgma} perform serial sampled UPGMA similar to Drummond and Rodrigo -(2000) however also performing NNI rearrangements. +(2000). } \examples{ From e1a366020f5561481805205f01178b2dc8eba8f0 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Thu, 4 Apr 2024 16:53:52 +0200 Subject: [PATCH 139/216] ML support for gap as its own state --- R/phylo.R | 50 +++++++++++++++++++++++++++++++++++--------------- 1 file changed, 35 insertions(+), 15 deletions(-) diff --git a/R/phylo.R b/R/phylo.R index 2d24ba54..396d0ea3 100644 --- a/R/phylo.R +++ b/R/phylo.R @@ -68,9 +68,9 @@ optimCodon <- function(tree, data, Q, subs, syn, trace = 0L, ab = c(0, 0), } -subsChoice <- function(type = .dnamodels) { +subsChoice <- function(type = .dnamodels, has_gap_state=FALSE) { type <- match.arg(type) - switch(type, + res <- switch(type, JC = list(optQ = FALSE, optBf = FALSE, subs = c(0, 0, 0, 0, 0, 0)), F81 = list(optQ = FALSE, optBf = TRUE, subs = c(0, 0, 0, 0, 0, 0)), K80 = list(optQ = TRUE, optBf = FALSE, subs = c(0, 1, 0, 0, 1, 0)), @@ -95,6 +95,14 @@ subsChoice <- function(type = .dnamodels) { SYM = list(optQ = TRUE, optBf = FALSE, subs = c(1, 2, 3, 4, 5, 0)), GTR = list(optQ = TRUE, optBf = TRUE, subs = c(1, 2, 3, 4, 5, 0)) ) + if(has_gap_state){ + tmp <- matrix(0,4,4) + tmp[lower.tri(tmp)] <- res$subs + tmp <- rbind(tmp, max(tmp) + 1) + res$subs <- tmp[lower.tri(tmp)] + res$optQ <- TRUE + } + res } @@ -133,7 +141,7 @@ optimInv <- function(tree, data, inv = 0.01, INV, ...) { weight <- as.double(attr(data, "weight")) tmp <- as.vector( INV %*% rep(1, attr(data, "nc")) ) ind <- which(tmp > 0) - max_inv <- sum(weight[ind]) / (sum(weight) + 1e-8) + max_inv <- sum(weight[ind]) / sum(weight) fn <- function(inv, tree, data, ...) pml.fit(tree, data, inv = inv, INV = INV, ll.0 = NULL, ...) res <- optimize(f = fn, interval = c(0, max_inv), lower = 0, upper = max_inv, @@ -629,9 +637,13 @@ readAArate <- function(file) { # save(.LG,.WAG,.Dayhoff,.JTT,.cpREV,.mtmam,.mtArt, file = "sysdata2.rda") -getModelAA <- function(model, bf = TRUE, Q = TRUE) { +getModelAA <- function(model, bf = TRUE, Q = TRUE, has_gap_state=FALSE) { model <- match.arg(eval(model), .aamodels) tmp <- get(paste(".", model, sep = ""), environment(pml)) + if(has_gap_state){ + tmp$Q <- add_gap_Q_AA(tmp$Q) + tmp$bf <- add_gap_bf_AA(tmp$bf) + } if (Q) assign("Q", tmp$Q, envir = parent.frame()) if (bf) assign("bf", tmp$bf, envir = parent.frame()) } @@ -797,7 +809,8 @@ update.pml <- function(object, ...) { if (!is.na(existing[9])) { model <- match.arg(eval(extras[[existing[9]]], parent.frame()), .aamodels) - getModelAA(model, bf = is.na(existing[3]), Q = is.na(existing[4])) + getModelAA(model, bf = is.na(existing[3]), Q = is.na(existing[4]), + has_gap_state = has_gap_state(data)) updateEig <- TRUE } else model <- object$model @@ -1358,7 +1371,8 @@ pml <- function(tree, data, bf = NULL, Q = NULL, inv = 0, k = 1, shape = 1, nc <- as.integer(attr(data, "nc")) if (type == "AA" & !is.null(model)) { model <- match.arg(model, .aamodels) - getModelAA(model, bf = is.null(bf), Q = is.null(Q)) + getModelAA(model, bf = is.null(bf), Q = is.null(Q), + has_gap_state = has_gap_state(data)) } if (type == "CODON") { .syn <- synonymous_subs(code=attr(data, "code")) @@ -1370,8 +1384,12 @@ pml <- function(tree, data, bf = NULL, Q = NULL, inv = 0, k = 1, shape = 1, model <- match.arg(model, .usermodels) if(model=="ORDERED") Q <- subsChoice_USER("ORDERED", nc)$Q } - if (is.null(bf)) - bf <- rep(1 / length(levels), length(levels)) + if (is.null(bf)){ + if(has_gap_state(data)){ + bf <- baseFreq(data) + bf[-nc] <- (1 - bf[nc]) / (nc-1) + } else bf <- rep(1 / nc, nc) + } if (is.character(bf)) { bf_choice <- match.arg(bf, c("equal", "empirical", "F1x4", "F3x4", "F61")) if (bf_choice == "F3x4" & type != "CODON") @@ -2014,7 +2032,7 @@ optim.pml <- function(object, optNni = FALSE, optBf = FALSE, optQ = FALSE, ASC <- object$ASC site.rate <- object$site.rate optFreeRate <- FALSE - if(site.rate=="free_rate"){ + if(site.rate=="free_rate"){subsC if(optGamma){ optFreeRate <- TRUE optGamma <- FALSE @@ -2038,7 +2056,6 @@ optim.pml <- function(object, optNni = FALSE, optBf = FALSE, optQ = FALSE, addTaxa <- FALSE trace <- control$trace tau <- control$tau -# mit Zeile 2000 vereinheitlichen method <- "unrooted" is_ultrametric <- FALSE timetree <- FALSE @@ -2125,7 +2142,7 @@ optim.pml <- function(object, optNni = FALSE, optBf = FALSE, optQ = FALSE, nr <- as.integer(attr(data, "nr")) nc <- as.integer(attr(data, "nc")) if (type == "DNA" & optModel) { - tmp <- subsChoice(model) + tmp <- subsChoice(model, has_gap_state(data)) optQ <- tmp$optQ if (!optQ) { Q <- rep(1, 6) @@ -2133,7 +2150,11 @@ optim.pml <- function(object, optNni = FALSE, optBf = FALSE, optQ = FALSE, } optBf <- tmp$optBf if (!optBf){ - bf <- c(0.25, 0.25, 0.25, 0.25) + if(has_gap_state(data)){ + bf <- baseFreq(data) + bf[-nc] <- (1 - bf[nc]) / (nc-1) + } else bf <- rep(1 / nc, nc) + #bf <- c(0.25, 0.25, 0.25, 0.25) } else bf <- baseFreq(data) object <- update.pml(object, bf = bf) subs <- tmp$subs @@ -2430,12 +2451,11 @@ optim.pml <- function(object, optNni = FALSE, optBf = FALSE, optQ = FALSE, ratchet_fun <- function(tree, data, ...){ weight <- attr(data, "weight") v <- rep(seq_along(weight), weight) - attr(data, "weight") <- tabulate(sample(v, replace = TRUE), length(weight)) - + attr(data, "weight") <- tabulate(sample(v, replace = TRUE), + length(weight)) res <- opt_Edge(tree, data, ...) ll2 <- res[[2]] opt_nni(tree, data, iter_max=5, trace=0, ll=ll2, ...)$tree -# opt_nni(tree, data, ...)$tree } for(i in seq_len(maxit)){ if(rearrangement == "stochastic"){ From cd1adca0e608d7165abb3dd49e3cd2ccc73d1501 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Fri, 5 Apr 2024 00:25:59 +0200 Subject: [PATCH 140/216] bugfix --- R/phylo.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/phylo.R b/R/phylo.R index 396d0ea3..295f88c9 100644 --- a/R/phylo.R +++ b/R/phylo.R @@ -2032,7 +2032,7 @@ optim.pml <- function(object, optNni = FALSE, optBf = FALSE, optQ = FALSE, ASC <- object$ASC site.rate <- object$site.rate optFreeRate <- FALSE - if(site.rate=="free_rate"){subsC + if(site.rate=="free_rate"){ if(optGamma){ optFreeRate <- TRUE optGamma <- FALSE From fa10b181b3b0b5693372c5e63b2715bff183fe4a Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Fri, 5 Apr 2024 00:32:24 +0200 Subject: [PATCH 141/216] small improvements --- R/Densi.R | 2 +- R/networx.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/Densi.R b/R/Densi.R index 525bf9c6..3b84f318 100644 --- a/R/Densi.R +++ b/R/Densi.R @@ -171,7 +171,7 @@ densiTree <- function(x, type = "cladogram", alpha = 1 / length(x), maxBT <- max(maxBT, max(tip.dates) - min(label)) at <- 1 - (max(tip.dates) - label) / maxBT if(direction=="leftwards" || direction=="downwards") at <- at + 1- max(at) - scaleX=FALSE + scaleX <- FALSE } else { if (scaleX) maxBT <- 1.0 diff --git a/R/networx.R b/R/networx.R index 8a1aadc9..a147a1f6 100644 --- a/R/networx.R +++ b/R/networx.R @@ -899,7 +899,7 @@ plot2D <- function(coords, net, show.tip.label = TRUE, show.edge.label = FALSE, font <- rep(font, length.out = nTips) tip.color <- rep(tip.color, length.out = nTips) cex <- rep(cex, length.out = nTips) - for (i in 1:length(label)) + for (i in seq_along(label)) text(xx[i], yy[i], label[i], font = font[i], cex = cex[i], srt = angle[i], adj = adj[i], col = tip.color[i]) From 4cbd17ece46c3363424c403384120a9c2b7d4e7f Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Fri, 5 Apr 2024 09:35:22 +0200 Subject: [PATCH 142/216] bug fix --- R/codon.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/codon.R b/R/codon.R index 00856360..64769a3e 100644 --- a/R/codon.R +++ b/R/codon.R @@ -87,7 +87,7 @@ dna2aa <- function(x, codonstart=1, code=1){ stopifnot(inherits(x, "phyDat")) if(attr(x, "type")!="DNA")stop("x must be a nucleotide sequence!") dna <- as.DNAbin(x) - aa <- as.phyDat(translate(dna, code=code, codonstart=codonstart)) + aa <- as.phyDat(trans(dna, code=code, codonstart=codonstart)) } From 2d56a50e31907c5c4851285e3f2a8b0ca1334011 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Wed, 17 Apr 2024 15:23:38 +0200 Subject: [PATCH 143/216] use ggseqlogo instead of seqLogo, update ape to >=5.8 --- DESCRIPTION | 6 +++--- vignettes/phangorn.bib | 9 +++++++++ 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index cbee4437..55c04cf9 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: phangorn Title: Phylogenetic Reconstruction and Analysis -Version: 3.0.0.0 +Version: 3.0.0.1 Authors@R: c(person("Klaus", "Schliep", role = c("aut", "cre"), email = "klaus.schliep@gmail.com", @@ -38,12 +38,13 @@ URL: https://github.com/KlausVigo/phangorn, https://klausvigo.github.io/phangorn/ BugReports: https://github.com/KlausVigo/phangorn/issues Depends: - ape (>= 5.7), + ape (>= 5.8), R (>= 4.1.0) Imports: digest, fastmatch, generics, + ggseqlogo, graphics, grDevices, igraph (>= 1.0), @@ -62,7 +63,6 @@ Suggests: rgl, rmarkdown, seqinr, - seqLogo, tinytest, xtable LinkingTo: diff --git a/vignettes/phangorn.bib b/vignettes/phangorn.bib index 4cfd0aa6..c544019b 100644 --- a/vignettes/phangorn.bib +++ b/vignettes/phangorn.bib @@ -1366,3 +1366,12 @@ @article{Koshi1996 doi = {10.1007/BF02198858}, } + +@Manual{ggseqlogo, + title = {ggseqlogo: A 'ggplot2' Extension for Drawing Publication-Ready Sequence +Logos}, + author = {Omar Wagih}, + year = {2024}, + note = {R package version 0.2}, + url = {https://CRAN.R-project.org/package=ggseqlogo}, + } From 76aa52c31a30b26a7637efc3b2ae4fb15c137f73 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Wed, 17 Apr 2024 15:28:06 +0200 Subject: [PATCH 144/216] ancestral reconstruction was rewritten --- NAMESPACE | 11 +- R/{ancestral_pml.R => ancestral.R} | 309 ++++++++++++++++---------- R/plotAnc.R | 130 +++++++++-- inst/tinytest/test_ancestral.R | 16 +- man/ancestral.pml.Rd | 59 +++-- man/parsimony.Rd | 2 +- man/{plotAnc.Rd => plot.ancestral.Rd} | 49 +++- man/write.ancestral.Rd | 38 ++++ vignettes/Ancestral.Rmd | 25 +-- 9 files changed, 437 insertions(+), 202 deletions(-) rename R/{ancestral_pml.R => ancestral.R} (54%) rename man/{plotAnc.Rd => plot.ancestral.Rd} (53%) create mode 100644 man/write.ancestral.Rd diff --git a/NAMESPACE b/NAMESPACE index 401d4ceb..fd99ef07 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -15,7 +15,6 @@ S3method(as.Matrix,splits) S3method(as.MultipleAlignment,phyDat) S3method(as.bitsplits,splits) S3method(as.character,phyDat) -S3method(as.data.frame,ancestral) S3method(as.data.frame,phyDat) S3method(as.matrix,splits) S3method(as.networx,phylo) @@ -26,7 +25,6 @@ S3method(as.phyDat,DNAStringSet) S3method(as.phyDat,DNAbin) S3method(as.phyDat,MultipleAlignment) S3method(as.phyDat,alignment) -S3method(as.phyDat,ancestral) S3method(as.phyDat,character) S3method(as.phyDat,data.frame) S3method(as.phyDat,factor) @@ -50,18 +48,21 @@ S3method(glance,pmlMix) S3method(hash,multiPhylo) S3method(hash,phylo) S3method(identify,networx) +S3method(image,ancestral) S3method(image,phyDat) S3method(logLik,pml) S3method(logLik,pmlMix) S3method(logLik,pmlPart) S3method(midpoint,multiPhylo) S3method(midpoint,phylo) +S3method(plot,ancestral) S3method(plot,codonTest) S3method(plot,networx) S3method(plot,pml) S3method(plot,pmlCluster) S3method(plot,pmlPart) S3method(print,SOWH) +S3method(print,ancestral) S3method(print,codonTest) S3method(print,phyDat) S3method(print,pml) @@ -111,6 +112,7 @@ export(allDescendants) export(allSitePattern) export(allSplits) export(allTrees) +export(ancestral) export(ancestral.pars) export(ancestral.pml) export(as.Matrix) @@ -197,6 +199,7 @@ export(phyDat2alignment) export(plotAnc) export(plotBS) export(plotRates) +export(plotSeqLogo) export(plot_gamma_plus_inv) export(pml) export(pml.control) @@ -239,6 +242,7 @@ export(treedist) export(upgma) export(wRF.dist) export(wpgma) +export(write.ancestral) export(write.nexus.dist) export(write.nexus.networx) export(write.nexus.splits) @@ -257,6 +261,8 @@ importFrom(Matrix,sparseMatrix) importFrom(fastmatch,fmatch) importFrom(generics,glance) importFrom(generics,tidy) +importFrom(ggseqlogo,ggseqlogo) +importFrom(ggseqlogo,make_col_scheme) importFrom(grDevices,adjustcolor) importFrom(grDevices,col2rgb) importFrom(grDevices,hcl.colors) @@ -331,6 +337,7 @@ importFrom(stats,update) importFrom(stats,xtabs) importFrom(utils,combn) importFrom(utils,download.file) +importFrom(utils,head) importFrom(utils,installed.packages) importFrom(utils,packageDescription) importFrom(utils,read.table) diff --git a/R/ancestral_pml.R b/R/ancestral.R similarity index 54% rename from R/ancestral_pml.R rename to R/ancestral.R index 328d9daf..8c51d478 100644 --- a/R/ancestral_pml.R +++ b/R/ancestral.R @@ -3,14 +3,24 @@ #' Marginal reconstruction of the ancestral character states. #' #' The argument "type" defines the criterion to assign the internal nodes. For -#' \code{ancestral.pml} so far "ml" and (empirical) "bayes" and for +#' \code{ancestral.pml} so far "ml and marginal (empirical) "bayes" and for #' \code{ancestral.pars} "MPR" and "ACCTRAN" are possible. #' +#' The function return a list containing the tree with node labels, the original +#' alignment as an \code{phyDat} object, a data.frame containing the +#' probabilities belonging to a state for all (internal nodes) and the most +#' likely state. For parsimony and nucleotide data the most likely state might +#' be ambiguous. For ML this is very unlikely to be the case. +#' +#' If the input tree does not contain unique node labels the function +#' \code{ape::MakeNodeLabel} is used to create them. +#' #' With parsimony reconstruction one has to keep in mind that there will be #' often no unique solution. #' -#' The functions use node labels if these are present and unique. Otherwise the -#' function \code{ape::MakeNodeLabel} is used to create them. +#' The functions use the node labels of the provided tree (also if part of the +#' \code{pml} object) if these are unique. Otherwise the function +#' \code{ape::MakeNodeLabel} is used to create them. #' #' For further details see vignette("Ancestral"). #' @@ -19,15 +29,17 @@ #' @param data an object of class phyDat #' @param type method used to assign characters to internal nodes, see details. #' @param cost A cost matrix for the transitions between two states. -#' @param return return a \code{phyDat} object or matrix of probabilities. -#' @param x an object of class ancestral. +## @param return return a \code{phyDat} object or matrix of probabilities. +## @param x an object of class ancestral. #' @param \dots Further arguments passed to or from other methods. -#' @return An object of class ancestral containing theestimated character -#' states. -#' For \code{return="phyDat"} an object of class "phyDat", containing -#' the ancestral states of all nodes. For nucleotide data this can contain -#' ambiguous states. Apart from fitch parsimony the most likely states are -#' returned. +#' @return An object of class ancestral. This is a list containing the tree with +#' node labels, the original alignment as an \code{phyDat} object, a +#' \code{data.frame} containing the probabilities belonging to a state for all +#' (internal nodes) and the most likely state. +## For \code{return="phyDat"} an object of class "phyDat", containing +## the ancestral states of all nodes. For nucleotide data this can contain +## ambiguous states. Apart from fitch parsimony the most likely states are +## returned. #' @author Klaus Schliep \email{klaus.schliep@@gmail.com} #' @seealso \code{\link{pml}}, \code{\link{parsimony}}, \code{\link[ape]{ace}}, #' \code{\link{plotAnc}}, \code{\link[ape]{root}}, @@ -50,21 +62,24 @@ #' fit <- pml(tree, Laurasiatherian) #' anc.ml <- ancestral.pml(fit, type = "ml") #' anc.p <- ancestral.pars(tree, Laurasiatherian) -#' \dontrun{ -#' require(seqLogo) -#' seqLogo( t(subset(anc.ml, 48, 1:20)[[1]]), ic.scale=FALSE) -#' seqLogo( t(subset(anc.p, 48, 1:20)[[1]]), ic.scale=FALSE) -#' } -#' # plot the first site pattern -#' plotAnc(tree, anc.ml, 1) +## \dontrun{ +## require(seqLogo) +## seqLogo( t(subset(anc.ml, 48, 1:20)[[1]]), ic.scale=FALSE) +## seqLogo( t(subset(anc.p, 48, 1:20)[[1]]), ic.scale=FALSE) +## } +#' # plot ancestral sequences at the root +#' plotSeqLogo( anc.ml, 48, 1, 20) +#' plotSeqLogo( anc.p, 48, 1, 20) +#' # plot the first character +#' plotAnc(anc.ml) #' # plot the third character -#' plotAnc(tree, anc.ml, attr(anc.ml, "index")[3]) +#' plotAnc(anc.ml, 3) #' #' @rdname ancestral.pml #' @export -ancestral.pml <- function(object, type = "marginal", return = "prob", ...) { +ancestral.pml <- function(object, type = "marginal", ...) { call <- match.call() - pt <- match.arg(type, c("marginal", "joint", "ml", "bayes")) + pt <- match.arg(type, c("marginal", "ml", "bayes")) # "joint", tree <- object$tree INV <- object$INV inv <- object$inv @@ -76,6 +91,7 @@ ancestral.pml <- function(object, type = "marginal", return = "prob", ...) { nTips <- length(tree$tip.label) node <- tree$edge[, 1] edge <- tree$edge[, 2] + nNode <- Nnode(tree) m <- length(edge) + 1 # max(edge) w <- object$w g <- object$g @@ -83,19 +99,13 @@ ancestral.pml <- function(object, type = "marginal", return = "prob", ...) { nr <- attr(data, "nr") nc <- attr(data, "nc") dat <- vector(mode = "list", length = m * l) - result <- vector(mode = "list", length = m) + result <- vector(mode = "list", length = nNode) + result2 <- vector(mode = "list", length = nNode) dim(dat) <- c(l, m) - - x <- attributes(data) - label <- makeAncNodeLabel(tree, ...) - x[["names"]] <- label + node_label <- makeAncNodeLabel(tree, ...) + tree$node.label <- node_label tmp <- length(data) - if (return != "phyDat") { - result <- new2old.phyDat(data) - } else { - result[1:nTips] <- data - } eig <- object$eig bf <- object$bf @@ -110,11 +120,11 @@ ancestral.pml <- function(object, type = "marginal", return = "prob", ...) { contrast <- attr(data, "contrast") # proper format eps <- 1.0e-5 - attr <- attributes(data) - pos <- match(attr$levels, attr$allLevels) + attrib <- attributes(data) + pos <- match(attrib$levels, attrib$allLevels) nco <- as.integer(dim(contrast)[1]) for (i in 1:l) dat[i, (nTips + 1):m] <- .Call('LogLik2', data, P[i, ], nr, nc, - node, edge, nTips, mNodes, contrast, nco) + node, edge, nTips, mNodes, contrast, nco) parent <- tree$edge[, 1] child <- tree$edge[, 2] nTips <- min(parent) - 1 @@ -137,41 +147,92 @@ ancestral.pml <- function(object, type = "marginal", return = "prob", ...) { if ((pt == "bayes") || (pt == "marginal")) tmp <- tmp * rep(bf, each = nr) tmp <- tmp / rowSums(tmp) - if (return == "phyDat") { - if (data_type == "DNA") { - tmp <- p2dna(tmp) - tmp <- fitchCoding2ambiguous(tmp) - } - else { - tmp <- pos[max.col(tmp)] - } + if (data_type == "DNA") { + tmp_max <- p2dna(tmp) + tmp_max <- fitchCoding2ambiguous(tmp_max) } - result[[j]] <- tmp + else { + tmp_max <- pos[max.col(tmp)] + } + result[[j - nTips]] <- tmp + result2[[j - nTips]] <- tmp_max } ind <- identical_sites(data) if(length(ind)>0){ - for(k in (nTips+1L):m){ - if(return=="prob") result[[k]][ind,] <- result[[1]][ind,] - else result[[k]][ind] <- data[[1]][ind] + for(k in seq_len(nNode)){ + result[[k]][ind,] <- contrast[data[[1]][ind],] + result2[[k]][ind] <- data[[1]][ind] } } - attributes(result) <- x - attr(result, "call") <- call - if(return=="prob") class(result) <- c("ancestral", "phyDat") - result + attrib$names <- node_label + attributes(result2) <- attrib + attributes(result) <- attrib + result <- list2df_ancestral(result, result2) + erg <- list(tree=tree, data=data, prob=result, state=result2) + class(erg) <- "ancestral" + erg +} + + +#' Export and convenience functions for ancestral reconstructions +#' +#' \code{write.ancestral} allows to export ancestral reconstructions. It writes +#' out the tree, a tab delimited text file with the probabilities and the +#' alignment. \code{read.ancestral} tries to read in these files. +#' \code{ancestral} generates an object of call ancestral. +#' +#' This allows also to read in reconstruction made by iqtree to use the +#' plotting capabilities of R. +#' @param x an object of class ancestral. +#' @param file a file name. File endings are added. +#' @param ... Further arguments passed to or from other methods. +#' @rdname write.ancestral +#' @export +write.ancestral <- function(x, file="ancestral"){ + stopifnot(inherits(x, "ancestral")) + write.phyDat(x$data, file=paste0(file, "_align.fasta")) + write.table(x$prob, file=paste0(file, ".state"), quote=FALSE, row.names=FALSE, + sep="\t") + write.tree(x$tree, file=paste0(file, "_tree.nwk")) } +#' @param tree an object of class phylo. +#' @param align an object of class phyDat. +#' @param prob an data.frame containing a matrix of posterior probabilities for +#' each state and site. +#' @importFrom utils head +#' @rdname write.ancestral +#' @export +ancestral <- function(tree, align, prob){ + stopifnot(inherits(tree, "phylo")) + stopifnot(inherits(align, "phyDat")) + stopifnot(inherits(prob, "data.frame")) + erg <- list(tree=tree, data=align, prob=prob) + class(erg) <- "ancestral" + erg +} -#' @rdname ancestral.pml + +#' @rdname write.ancestral #' @export -as.phyDat.ancestral <- function(x, ...) { +print.ancestral <- function(x, ...){ + stopifnot(inherits(x, "ancestral")) + print(x$tree) + print(x$data) + print(head(x$prob)) +} + +## @rdname ancestral.pml +## @export +#as.phyDat.ancestral <- function(x, ...) { +highest_state <- function(x, ...) { type <- attr(x, "type") fun2 <- function(x) { x <- p2dna(x) fitchCoding2ambiguous(x) } - if (type == "DNA") { + if (type == "DNA" && !has_gap_state(x)) { res <- lapply(x, fun2) } else { @@ -187,10 +248,8 @@ as.phyDat.ancestral <- function(x, ...) { } -#' @rdname ancestral.pml -#' @export -as.data.frame.ancestral <- function(x, ...) { - stopifnot(inherits(x, "ancestral")) +list2df_ancestral <- function(x, y=NULL, ...) { +# stopifnot(inherits(x, "ancestral")) l <- length(x) nr <- attr(x, "nr") nc <- attr(x, "nc") @@ -203,8 +262,16 @@ as.data.frame.ancestral <- function(x, ...) { X[(j+1):(j+nr), ] <- x[[i]][index, ] j <- j + nr } - res <- data.frame(Site=rep(seq_len(nr), l), Node=rep(nam, each=nr), X) - colnames(res) <- c("Site", "Node", attr(x, "levels")) + if(!is.null(y) & inherits(y, "phyDat")){ + y <- y[names(x)] + Y <- unlist(as.data.frame(y)) + res <- data.frame(Node=rep(nam, each=nr), Site=rep(seq_len(nr), l), Y, X) + colnames(res) <- c("Node", "Site", "State", paste0("p_", attr(x, "levels"))) + } else{ + res <- data.frame(Node=rep(nam, each=nr), Site=rep(seq_len(nr), l), X) + colnames(res) <- c("Node", "Site", paste0("p_", attr(x, "levels"))) + } + rownames(res) <- NULL res } @@ -219,26 +286,39 @@ fitchCoding2ambiguous <- function(x, type = "DNA") { #' @rdname ancestral.pml #' @export ancestral.pars <- function(tree, data, type = c("MPR", "ACCTRAN", "POSTORDER"), - cost = NULL, return = "prob", ...) { + cost = NULL, ...) { + #, return = "prob" call <- match.call() type <- match.arg(type) + tree$node.label <- makeAncNodeLabel(tree, ...) + contrast <- attr(data, "contrast") + data <- data[tree$tip.label,] if (type == "ACCTRAN" || type=="POSTORDER") { - res <- ptree(tree, data, return = return, acctran=(type == "ACCTRAN")) - attr(res, "call") <- call + #, return = return + res <- ptree(tree, data, acctran=(type == "ACCTRAN")) +# attr(res, "call") <- call } if (type == "MPR") { - res <- mpr(tree, data, cost = cost, return = return) - attr(res, "call") <- call + res <- mpr(tree, data, cost = cost) + #, return = return +# attr(res, "call") <- call } - data <- data[tree$tip.label,] + result <- res[[1]] + result2 <- res[[2]] ind <- identical_sites(data) if(length(ind)>0){ - for(k in (Ntip(tree)+1L):length(res)){ - if(return=="prob") res[[k]][ind,] <- res[[1]][ind,] - else res[[k]][ind] <- data[[1]][ind] + for(k in seq_len(Nnode(tree))){ + result[[k]][ind,] <- contrast[data[[1]][ind],] + result2[[k]][ind] <- data[[1]][ind] } } - res +# attrib$names <- node_label +# attributes(result2) <- attrib +# attributes(result) <- attrib + result <- list2df_ancestral(result, result2) + erg <- list(tree=tree, data=data, prob=result, state=result2) + class(erg) <- "ancestral" + erg } @@ -271,18 +351,18 @@ mpr.help <- function(tree, data, cost = NULL) { res } - -mpr <- function(tree, data, cost = NULL, return = "prob", ...) { +# , return = "prob" +mpr <- function(tree, data, cost = NULL, ...) { data <- subset(data, tree$tip.label) att <- attributes(data) + att$names <- tree$node.label type <- att$type nr <- att$nr nc <- att$nc res <- mpr.help(tree, data, cost) l <- length(tree$tip.label) m <- length(res) - label <- makeAncNodeLabel(tree, ...) - att[["names"]] <- label + nNode <- Nnode(tree) ntips <- length(tree$tip.label) contrast <- att$contrast eps <- 5e-6 @@ -293,31 +373,33 @@ mpr <- function(tree, data, cost = NULL, return = "prob", ...) { rs <- rowSums(X) # apply(X, 1, sum) X / rs } - for (i in 1:ntips) res[[i]] <- contrast[data[[i]], , drop = FALSE] +# for (i in 1:ntips) res[[i]] <- contrast[data[[i]], , drop = FALSE] for (i in (ntips + 1):m) res[[i]][] <- as.numeric(res[[i]] < RM) - if (return == "prob") { + res <- res[(ntips + 1):m] +# if (return == "prob") { # for(i in 1:ntips) res[[i]] <- contrast[data[[i]],,drop=FALSE] - res <- lapply(res, fun) - attributes(res) <- att - class(res) <- c("ancestral", "phyDat") - } + res_prob <- lapply(res, fun) + attributes(res_prob) <- att + class(res_prob) <- c("ancestral", "phyDat") +# } # else res[1:ntips] <- data[1:ntips] fun2 <- function(x) { x <- p2dna(x) fitchCoding2ambiguous(x) } - if (return != "prob") { - if (type == "DNA") { - res <- lapply(res, fun2) - attributes(res) <- att - } - else { - attributes(res) <- att - res <- as.phyDat.ancestral(res) - } - res[1:ntips] <- data +# if (return != "prob") { + if (type == "DNA") { + res_state <- lapply(res, fun2) + attributes(res_state) <- att } - res + else { + attributes(res) <- att + res_state <- highest_state(res) + attributes(res_state) <- att + } +# res[1:ntips] <- data +# } + list(res_prob, res_state) } @@ -365,59 +447,62 @@ acctran <- function(tree, data) { acctran2(tree, data) } - -ptree <- function(tree, data, return = "prob", acctran=TRUE, ...) { +#, return = "prob" +ptree <- function(tree, data, acctran=TRUE, ...) { tree <- reorder(tree, "postorder") data <- subset(data, tree$tip.label) edge <- tree$edge att <- attributes(data) + att$names <- tree$node.label nr <- att$nr type <- att$type m <- max(edge) + nNode <- Nnode(tree) nTip <- Ntip(tree) f <- init_fitch(data, FALSE, FALSE, m=2L) f$traverse(edge) tmp <- reorder(tree)$edge tmp <- tmp[tmp[,2]>Ntip(tree),] if(length(tmp)>0 && acctran==TRUE)f$acctran_traverse(tmp) - res <- vector("list", m) - att$names <- makeAncNodeLabel(tree, ...) - if(type=="DNA" && return != "prob"){ - indx <- c(1, 2, 6, 3, 7, 9, 12, 4, 8, 10, 13, 11, 14, 15, 16) - res[1:nTip] <- data[1:nTip] - for(i in (nTip+1):m) - res[[i]] <- indx[f$getAncAmb(i)[1:nr]] - attributes(res) <- att - return(res) - } - else { + res <- res_state <- vector("list", nNode) +# res <- vector("list", m) + att$names <- tree$node.label #makeAncNodeLabel(tree, ...) +# else { fun <- function(X) { rs <- rowSums(X) X / rs } contrast <- att$contrast - for(i in seq_len(nTip)) res[[i]] <- contrast[data[[i]], , drop=FALSE] - for(i in (nTip+1):m) res[[i]] <- f$getAnc(i)[1:nr, , drop=FALSE] +# for(i in seq_len(nTip)) res[[i]] <- contrast[data[[i]], , drop=FALSE] +# for(i in (nTip+1):m) res[[i]] <- f$getAnc(i)[1:nr, , drop=FALSE] + for(i in seq_len(nNode)) res[[i]] <- f$getAnc(i+nTip)[seq_len(nr), , drop=FALSE] res <- lapply(res, fun) attributes(res) <- att class(res) <- c("ancestral", "phyDat") +# } + if(type=="DNA"){ + indx <- c(1, 2, 6, 3, 7, 9, 12, 4, 8, 10, 13, 11, 14, 15, 16) + # res[1:nTip] <- data[1:nTip] + for(i in seq_len(nNode)) # (nTip+1):m) + res_state[[i]] <- indx[f$getAncAmb(i+nTip)[1:nr]] + attributes(res_state) <- att + # return(res) + } else{ + res_state <- highest_state(res) + class(res_state) <- "phyDat" } - if(return != "prob"){ - res <- as.phyDat.ancestral(res) - class(res) <- "phyDat" - } - res + list(res, res_state) } makeAncNodeLabel <- function(tree, ...){ if(!is.null(tree$node.label)){ node_label <- tree$node.label - if(length(unique(node_label)) == Nnode(tree)) return(c(tree$tip.label, node_label)) + if(length(unique(node_label)) == Nnode(tree)) return(node_label) else message("Node labels are not unique, used makeNodeLabel(tree, ...) to create them!") } tree <- makeNodeLabel(tree, ...) - c(tree$tip.label, tree$node.label) + tree$node.label } diff --git a/R/plotAnc.R b/R/plotAnc.R index 73af60ff..c705eab6 100644 --- a/R/plotAnc.R +++ b/R/plotAnc.R @@ -10,6 +10,16 @@ getTransition <- function(scheme, levels){ } +getAncDF <- function(x){ + tmp <- x$data + contrast <- attr(tmp, "contrast") + attr(tmp, "contrast") <- contrast / rowSums(contrast) + tmp <- new2old.phyDat(tmp) + df <- list2df_ancestral(tmp, x$data) + rbind(df, x$prob) +} + + #' Plot ancestral character on a tree #' #' \code{plotAnc} plots a phylogeny and adds character to the nodes. Either @@ -19,10 +29,20 @@ getTransition <- function(scheme, levels){ #' #' For further details see vignette("Ancestral"). #' -#' @param tree a tree, i.e. an object of class pml -#' @param data an object of class \code{phyDat} or \code{ancestral}. -#' @param site.pattern logical, plot i-th site pattern or i-th site -#' @param i plots the i-th site of the \code{data}. +## @param tree a tree, i.e. an object of class pml or an object of class +## ancestral. +#' @param x an object of class \code{ancestral}. +## @param site.pattern logical, plot i-th site pattern or i-th site +#' @param i,site plots the i-th site. +#' @param which a subset of the numbers 1:3, by default 1:3, referring to +#' \enumerate{ +#' \item "tree with pie charts" plot +#' \item "seqlogo" plot +#' \item "image" plot +#' } +#' @param node to plot for which the propabilities should be plotted. +#' @param start start position to plot. +#' @param end end position to plot. #' @param col a vector containing the colors for all possible states. #' @param cex.pie a numeric defining the size of the pie graphs. #' @param pos a character string defining the position of the legend. @@ -31,7 +51,9 @@ getTransition <- function(scheme, levels){ #' nucleotides "Ape_NT" and"RY_NT". Names can be abbreviated. #' @param \dots Further arguments passed to or from other methods. #' @author Klaus Schliep \email{klaus.schliep@@gmail.com} -#' @seealso \code{\link{ancestral.pml}}, \code{\link[ape]{plot.phylo}} +#' @seealso \code{\link{ancestral.pml}}, \code{\link[ape]{plot.phylo}}, +#' \code{\link[ape]{image.DNAbin}}, \code{\link[ape]{image.AAbin}} +#' \code{\link[ggseqlogo]{ggseqlogo}} #' @keywords plot #' @examples #' @@ -40,35 +62,63 @@ getTransition <- function(scheme, levels){ #' tree <- makeNodeLabel(tree) #' anc.p <- ancestral.pars(tree, Laurasiatherian) #' # plot the third character -#' plotAnc(tree, anc.p, 3) +#' ## plotAnc(tree, anc.p, 3) +#' plotAnc(anc.p, 3) +#' plotSeqLogo(anc.p, node="Node10", 1, 25) #' #' data(chloroplast) #' tree <- pratchet(chloroplast, maxit=10, trace=0) #' tree <- makeNodeLabel(tree) #' anc.ch <- ancestral.pars(tree, chloroplast) #' image(chloroplast[, 1:25]) -#' plotAnc(tree, anc.ch, 21, scheme="Ape_AA") -#' plotAnc(tree, anc.ch, 21, scheme="Clustal") +#' plotAnc(anc.ch, 21, scheme="Ape_AA") +#' plotAnc(anc.ch, 21, scheme="Clustal") +#' plotSeqLogo(anc.ch, node="Node1", 1, 25, scheme="Clustal") #' @importFrom grDevices hcl.colors +#' @importFrom ggseqlogo make_col_scheme ggseqlogo +#' @rdname plot.ancestral +#' @export +plot.ancestral <- function(x, which = c(1, 2, 3), site = 1, + node=NULL, col = NULL, cex.pie = .5, pos = "bottomright", + scheme=NULL, start=1, end=10, ...){ + stopifnot(inherits(x, "ancestral")) + if (!is.numeric(which) || any(which < 1) || any(which > 3)) + stop("'which' must be in 1:3") +# which <- match.arg(which, c("pie", "seqlogo", "image"), TRUE) + show <- rep(FALSE, 3) + show[which] <- TRUE + if(show[1L])plotAnc(x, i = site, col = col, cex.pie = cex.pie, pos = pos, + scheme=scheme, ...) + if(show[2L])plotSeqLogo(x, node, start=start, end=end, scheme=scheme, ...) + if(show[3L])image(x, scheme=scheme, ...) +} + + + +#' @rdname plot.ancestral #' @export -plotAnc <- function(tree, data, i = 1, site.pattern = FALSE, col = NULL, +plotAnc <- function(x, i = 1, col = NULL, cex.pie = .5, pos = "bottomright", scheme=NULL, ...) { - stopifnot(inherits(data, "phyDat")) - y <- subset(data, select = i, site.pattern = site.pattern) - if(is.null(tree$node.label) || any(is.na(match(tree$node.label, names(y)))) || + stopifnot(inherits(x, "ancestral")) + df <- getAncDF(x) + data <- x$data + tree <- x$tree + Y <- subset(df, Site==i) + y <- as.matrix(Y[, -c(1:3)]) + # y <- y[, -c(1:3)] + colnames(y) <- gsub("p_", "", colnames(y)) + row.names(y) <- Y$Node + if(is.null(tree$node.label) || any(is.na(match(tree$node.label, rownames(y)))) || is.numeric(tree$node.label)) tree <- makeNodeLabel(tree) - if(any(is.na(match(c(tree$tip.label, tree$node.label), names(y))))) + + if(any(is.na(match(c(tree$tip.label, tree$node.label), rownames(y))))) stop("Tree needs nodelabel, which match the labels of the alignment!") - y <- y[c(tree$tip.label, tree$node.label),] CEX <- cex.pie xrad <- CEX * diff(par("usr")[1:2]) / 50 levels <- attr(data, "levels") nc <- attr(data, "nc") - if(inherits(data, "ancestral")){ - y <- matrix(unlist(y[]), ncol = nc, byrow = TRUE) - } else y <- attr(data, "contrast")[unlist(y),] if(!is.null(scheme)){ scheme <- match.arg(scheme, c("Ape_AA", "Zappo_AA", "Clustal", "Polarity", "Transmembrane_tendency", "Ape_NT", "RY_NT")) @@ -90,9 +140,6 @@ plotAnc <- function(tree, data, i = 1, site.pattern = FALSE, col = NULL, col <- sc$col nc <- ncol(y) } - # l <- dim(y)[1] - # dat <- matrix(0, l, nc) - # for (i in 1:l) dat[i, ] <- y[[i]] plot(tree, label.offset = 1.1 * xrad, plot = FALSE, ...) lastPP <- get("last_plot.phylo", envir = .PlotPhyloEnv) XX <- lastPP$xx @@ -112,3 +159,46 @@ plotAnc <- function(tree, data, i = 1, site.pattern = FALSE, col = NULL, ) if (!is.null(pos)) legend(pos, legend=levels, pch=21, pt.bg = col) } + +#' @rdname plot.ancestral +#' @export +plotSeqLogo <- function(x, node, start=1, end=10, scheme="Ape_NT", ...){ + stopifnot(inherits(x, "ancestral")) + type <- attr(x$data, "type") + tree <- x$tree + df <- getAncDF(x) + nodes <- c(tree$tip.label, tree$node.label) + if(is.numeric(node)) node <- nodes[node] + X <- subset(df, subset=Node==node) + end <- min(end, nrow(X)) + X <- X[start:end, , drop=FALSE] + X <- t(as.matrix(X[, -c(1:3)])) + tmp <- gsub("p_", "", rownames(X)) + lev <- rownames(X) <- toupper(tmp) + if(!is.null(scheme)){ + scheme <- match.arg(scheme, c("Ape_AA", "Zappo_AA", "Clustal", "Polarity", + "Transmembrane_tendency", "Ape_NT", "RY_NT")) + sc <- get(scheme, environment(ace)) + if(has_gap_state(x$data) && type=="AA"){ + sc$properties <- c(sc$properties, Gap="-") + sc$color <- c(sc$color, "#FFFFFF") + } + l <- lengths(sc$properties) + SC <- make_col_scheme(chars = toupper(unlist(sc$properties)), + groups = rep(names(sc$properties), l), + cols=rep(sc$color, l)) + + } + else SC <- make_col_scheme(chars=lev, cols= hcl.colors(length(lev))) + ggseqlogo(X, col_scheme=SC, method='p') +} + + +#' @rdname plot.ancestral +#' @export +image.ancestral <- function(x, ...){ + tmp <- c(x$"data", x$"state") + image(tmp, ...) +} + + diff --git a/inst/tinytest/test_ancestral.R b/inst/tinytest/test_ancestral.R index 3d54aeff..185c46d0 100644 --- a/inst/tinytest/test_ancestral.R +++ b/inst/tinytest/test_ancestral.R @@ -15,15 +15,15 @@ fit <- pml(tree, dna) # dna tests differs from other data types as it may returns ambiguous data # test ancestral generics # test.ml1 <- ancestral.pml(fit, type = "ml") -test_ml <- ancestral.pml(fit, type = "ml", return = "phyDat") -test_mpr <- ancestral.pars(tree, dna, "MPR", return = "phyDat") -test_acctran <- ancestral.pars(tree, dna, "ACCTRAN", return = "phyDat") +test_ml <- ancestral.pml(fit, type = "ml") +test_mpr <- ancestral.pars(tree, dna, "MPR") +test_acctran <- ancestral.pars(tree, dna, "ACCTRAN") -expect_equal(as.character(test_ml), as.character(test_acctran)) -expect_equal(as.character(test_ml), as.character(test_mpr)) +#expect_equal(as.character(test_ml), as.character(test_acctran)) +#expect_equal(as.character(test_ml), as.character(test_mpr)) -test_mpr_2 <- ancestral.pars(tree2, dna, "MPR", return = "phyDat") -test_acctran_2 <- ancestral.pars(tree2, dna, "ACCTRAN", return = "phyDat") +#test_mpr_2 <- ancestral.pars(tree2, dna, "MPR") +#test_acctran_2 <- ancestral.pars(tree2, dna, "ACCTRAN") -expect_equal(test_mpr_2[,1], test_acctran_2[,1], check.attributes = FALSE) +#expect_equal(test_mpr_2[,1], test_acctran_2[,1], check.attributes = FALSE) diff --git a/man/ancestral.pml.Rd b/man/ancestral.pml.Rd index a0c8b80d..2ffc2140 100644 --- a/man/ancestral.pml.Rd +++ b/man/ancestral.pml.Rd @@ -1,36 +1,25 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/ancestral_pml.R +% Please edit documentation in R/ancestral.R \name{ancestral.pml} \alias{ancestral.pml} -\alias{as.phyDat.ancestral} -\alias{as.data.frame.ancestral} \alias{ancestral.pars} \alias{pace} \title{Ancestral character reconstruction.} \usage{ -ancestral.pml(object, type = "marginal", return = "prob", ...) - -\method{as.phyDat}{ancestral}(x, ...) - -\method{as.data.frame}{ancestral}(x, ...) +ancestral.pml(object, type = "marginal", ...) ancestral.pars(tree, data, type = c("MPR", "ACCTRAN", "POSTORDER"), - cost = NULL, return = "prob", ...) + cost = NULL, ...) -pace(tree, data, type = c("MPR", "ACCTRAN", "POSTORDER"), cost = NULL, - return = "prob", ...) +pace(tree, data, type = c("MPR", "ACCTRAN", "POSTORDER"), cost = NULL, ...) } \arguments{ \item{object}{an object of class pml} \item{type}{method used to assign characters to internal nodes, see details.} -\item{return}{return a \code{phyDat} object or matrix of probabilities.} - \item{\dots}{Further arguments passed to or from other methods.} -\item{x}{an object of class ancestral.} - \item{tree}{a tree, i.e. an object of class pml} \item{data}{an object of class phyDat} @@ -38,26 +27,34 @@ pace(tree, data, type = c("MPR", "ACCTRAN", "POSTORDER"), cost = NULL, \item{cost}{A cost matrix for the transitions between two states.} } \value{ -An object of class ancestral containing theestimated character -states. -For \code{return="phyDat"} an object of class "phyDat", containing -the ancestral states of all nodes. For nucleotide data this can contain -ambiguous states. Apart from fitch parsimony the most likely states are -returned. +An object of class ancestral. This is a list containing the tree with +node labels, the original alignment as an \code{phyDat} object, a +\code{data.frame} containing the probabilities belonging to a state for all +(internal nodes) and the most likely state. } \description{ Marginal reconstruction of the ancestral character states. } \details{ The argument "type" defines the criterion to assign the internal nodes. For -\code{ancestral.pml} so far "ml" and (empirical) "bayes" and for +\code{ancestral.pml} so far "ml and marginal (empirical) "bayes" and for \code{ancestral.pars} "MPR" and "ACCTRAN" are possible. +The function return a list containing the tree with node labels, the original +alignment as an \code{phyDat} object, a data.frame containing the +probabilities belonging to a state for all (internal nodes) and the most +likely state. For parsimony and nucleotide data the most likely state might +be ambiguous. For ML this is very unlikely to be the case. + +If the input tree does not contain unique node labels the function +\code{ape::MakeNodeLabel} is used to create them. + With parsimony reconstruction one has to keep in mind that there will be often no unique solution. -The functions use node labels if these are present and unique. Otherwise the -function \code{ape::MakeNodeLabel} is used to create them. +The functions use the node labels of the provided tree (also if part of the +\code{pml} object) if these are unique. Otherwise the function +\code{ape::MakeNodeLabel} is used to create them. For further details see vignette("Ancestral"). } @@ -69,15 +66,13 @@ tree <- makeNodeLabel(tree) fit <- pml(tree, Laurasiatherian) anc.ml <- ancestral.pml(fit, type = "ml") anc.p <- ancestral.pars(tree, Laurasiatherian) -\dontrun{ -require(seqLogo) -seqLogo( t(subset(anc.ml, 48, 1:20)[[1]]), ic.scale=FALSE) -seqLogo( t(subset(anc.p, 48, 1:20)[[1]]), ic.scale=FALSE) -} -# plot the first site pattern -plotAnc(tree, anc.ml, 1) +# plot ancestral sequences at the root +plotSeqLogo( anc.ml, 48, 1, 20) +plotSeqLogo( anc.p, 48, 1, 20) +# plot the first character +plotAnc(anc.ml) # plot the third character -plotAnc(tree, anc.ml, attr(anc.ml, "index")[3]) +plotAnc(anc.ml, 3) } \references{ diff --git a/man/parsimony.Rd b/man/parsimony.Rd index 714d415e..33bc04e3 100644 --- a/man/parsimony.Rd +++ b/man/parsimony.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/ancestral_pml.R, R/fitch64.R, R/parsimony.R, +% Please edit documentation in R/ancestral.R, R/fitch64.R, R/parsimony.R, % R/sankoff.R \name{acctran} \alias{acctran} diff --git a/man/plotAnc.Rd b/man/plot.ancestral.Rd similarity index 53% rename from man/plotAnc.Rd rename to man/plot.ancestral.Rd index d9269408..4b3ff20f 100644 --- a/man/plotAnc.Rd +++ b/man/plot.ancestral.Rd @@ -1,20 +1,34 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/plotAnc.R -\name{plotAnc} +\name{plot.ancestral} +\alias{plot.ancestral} \alias{plotAnc} +\alias{plotSeqLogo} +\alias{image.ancestral} \title{Plot ancestral character on a tree} \usage{ -plotAnc(tree, data, i = 1, site.pattern = FALSE, col = NULL, - cex.pie = 0.5, pos = "bottomright", scheme = NULL, ...) +\method{plot}{ancestral}(x, which = c(1, 2, 3), site = 1, node = NULL, + col = NULL, cex.pie = 0.5, pos = "bottomright", scheme = NULL, + start = 1, end = 10, ...) + +plotAnc(x, i = 1, col = NULL, cex.pie = 0.5, pos = "bottomright", + scheme = NULL, ...) + +plotSeqLogo(x, node, start = 1, end = 10, scheme = "Ape_NT", ...) + +\method{image}{ancestral}(x, ...) } \arguments{ -\item{tree}{a tree, i.e. an object of class pml} - -\item{data}{an object of class \code{phyDat} or \code{ancestral}.} +\item{x}{an object of class \code{ancestral}.} -\item{i}{plots the i-th site of the \code{data}.} +\item{which}{a subset of the numbers 1:3, by default 1:3, referring to +\enumerate{ +\item "tree with pie charts" plot +\item "seqlogo" plot +\item "image" plot +}} -\item{site.pattern}{logical, plot i-th site pattern or i-th site} +\item{node}{to plot for which the propabilities should be plotted.} \item{col}{a vector containing the colors for all possible states.} @@ -26,7 +40,13 @@ plotAnc(tree, data, i = 1, site.pattern = FALSE, col = NULL, "Zappo_AA", "Clustal", "Polarity" and "Transmembrane_tendency", for nucleotides "Ape_NT" and"RY_NT". Names can be abbreviated.} +\item{start}{start position to plot.} + +\item{end}{end position to plot.} + \item{\dots}{Further arguments passed to or from other methods.} + +\item{i, site}{plots the i-th site.} } \description{ \code{plotAnc} plots a phylogeny and adds character to the nodes. Either @@ -44,18 +64,23 @@ example(NJ) tree <- makeNodeLabel(tree) anc.p <- ancestral.pars(tree, Laurasiatherian) # plot the third character -plotAnc(tree, anc.p, 3) +## plotAnc(tree, anc.p, 3) +plotAnc(anc.p, 3) +plotSeqLogo(anc.p, node="Node10", 1, 25) data(chloroplast) tree <- pratchet(chloroplast, maxit=10, trace=0) tree <- makeNodeLabel(tree) anc.ch <- ancestral.pars(tree, chloroplast) image(chloroplast[, 1:25]) -plotAnc(tree, anc.ch, 21, scheme="Ape_AA") -plotAnc(tree, anc.ch, 21, scheme="Clustal") +plotAnc(anc.ch, 21, scheme="Ape_AA") +plotAnc(anc.ch, 21, scheme="Clustal") +plotSeqLogo(anc.ch, node="Node1", 1, 25, scheme="Clustal") } \seealso{ -\code{\link{ancestral.pml}}, \code{\link[ape]{plot.phylo}} +\code{\link{ancestral.pml}}, \code{\link[ape]{plot.phylo}}, +\code{\link[ape]{image.DNAbin}}, \code{\link[ape]{image.AAbin}} +\code{\link[ggseqlogo]{ggseqlogo}} } \author{ Klaus Schliep \email{klaus.schliep@gmail.com} diff --git a/man/write.ancestral.Rd b/man/write.ancestral.Rd new file mode 100644 index 00000000..ea611255 --- /dev/null +++ b/man/write.ancestral.Rd @@ -0,0 +1,38 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ancestral.R +\name{write.ancestral} +\alias{write.ancestral} +\alias{ancestral} +\alias{print.ancestral} +\title{Export and convenience functions for ancestral reconstructions} +\usage{ +write.ancestral(x, file = "ancestral") + +ancestral(tree, align, prob) + +\method{print}{ancestral}(x, ...) +} +\arguments{ +\item{x}{an object of class ancestral.} + +\item{file}{a file name. File endings are added.} + +\item{tree}{an object of class phylo.} + +\item{align}{an object of class phyDat.} + +\item{prob}{an data.frame containing a matrix of posterior probabilities for +each state and site.} + +\item{...}{Further arguments passed to or from other methods.} +} +\description{ +\code{write.ancestral} allows to export ancestral reconstructions. It writes +out the tree, a tab delimited text file with the probabilities and the +alignment. \code{read.ancestral} tries to read in these files. +\code{ancestral} generates an object of call ancestral. +} +\details{ +This allows also to read in reconstruction made by iqtree to use the +plotting capabilities of R. +} diff --git a/vignettes/Ancestral.Rmd b/vignettes/Ancestral.Rmd index c965cff0..d1a65387 100644 --- a/vignettes/Ancestral.Rmd +++ b/vignettes/Ancestral.Rmd @@ -47,28 +47,23 @@ anc.mpr <- ancestral.pars(tree, primates, "MPR") All the ancestral reconstructions for parsimony are based on the fitch algorithm and so far only bifurcating trees are allowed. However trees can get pruned afterwards using the function `multi2di` from _ape_. -The `seqLogo` function from the _seqLogo_ package from Bioconductor provides a neat way to show proportions of a nucleotides of ancestral states (see figure 1). +The `plotSeqLogo` function is a wrapper around the from the _ggseqlogo_ function the the _ggseqlogo_ package [@ggseqlogo] and provides a simple way to show proportions of a nucleotides of ancestral states (see figure 1). -```{r seqLogo, fig.cap="Fig 1. Ancestral reconstruction for a node.", eval=FALSE} -library(seqLogo) -seqLogo( t(subset(anc.mpr, getRoot(tree), 1:20)[[1]]), ic.scale=FALSE) +```{r seqLogo, fig.cap="Fig 1. Ancestral reconstruction for a node.", eval=TRUE} +#library(seqLogo) +#seqLogo( t(subset(anc.mpr, getRoot(tree), 1:20)[[1]]), ic.scale=FALSE) +plotSeqLogo(anc.mpr, node=getRoot(tree), 1, 20) ``` ![](seqLogo.png) -You may need to install _seqLogo_ before -```{r, eval=FALSE} -if (!requireNamespace("BiocManager", quietly = TRUE)) - install.packages("BiocManager") -BiocManager::install("seqLogo") -``` ```{r MPR, fig.cap="Fig 2. Ancestral reconstruction using MPR."} -plotAnc(tree, anc.mpr, 17) +plotAnc(anc.mpr, 17) title("MPR") ``` ```{r ACCTRAN, fig.cap="Fig 3. Ancestral reconstruction using ACCTRAN."} -plotAnc(tree, anc.acctran, 17) +plotAnc(anc.acctran, 17) title("ACCTRAN") ``` @@ -98,11 +93,11 @@ anc.bayes <- ancestral.pml(fit, "bayes") ``` The differences of the two approaches for a specific site (17) are represented in the following figures. ```{r plotML, fig.cap="Fig 4. Ancestral reconstruction the using the maximum likelihood."} -plotAnc(tree, anc.ml, 17) +plotAnc(anc.ml, 17) title("ML") ``` ```{r plotB, fig.cap="Fig 5. Ancestral reconstruction using (empirical) Bayes."} -plotAnc(tree, anc.bayes, 17) +plotAnc(anc.bayes, 17) title("Bayes") ``` @@ -152,7 +147,7 @@ all.equal(fit_SYM$logLik, fit_ace$loglik+log(1/3)) ```{r SYM_reconstruction} anc_SYM <- ancestral.pml(fit_SYM, "ml") -plotAnc(bird.orders, anc_SYM) +plotAnc(anc_SYM) ``` More complicated models can be applied using defining the rate matrix as shown in the vignette _Markov models and transition rate matrices_. From 90f81665c465bdd755fa0d32fe7bdf1fabd58d23 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Wed, 17 Apr 2024 15:33:50 +0200 Subject: [PATCH 145/216] clean up --- R/networx.R | 6 +++--- R/pmlMix.R | 6 ++++-- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/R/networx.R b/R/networx.R index a147a1f6..a51817ea 100644 --- a/R/networx.R +++ b/R/networx.R @@ -43,9 +43,9 @@ allCircularSplits <- function(k, labels = NULL) { } -degree <- function(x){ - tabulate(x$edge) -} +#degree <- function(x){ +# tabulate(x$edge) +#} splits2design <- function(obj, weight = NULL, x=TRUE) { diff --git a/R/pmlMix.R b/R/pmlMix.R index eeae51e1..d701b9ff 100644 --- a/R/pmlMix.R +++ b/R/pmlMix.R @@ -531,7 +531,8 @@ pmlMix <- function(formula, fit, m = 2, omega = rep(1 / m, m), pl0 <- ll[, -i, drop = FALSE] %*% omega[-i] fits[[i]] <- update(fits[[i]], llMix = pl0, wMix = omega[i]) } - if(trace >0) cat("logLik optim.pml start:", sum(weight * log(ll %*% omega)), "\n") + if(trace >0) cat("logLik optim.pml start:", + sum(weight * log(ll %*% omega)), "\n") for (i in 1:r) { pl0 <- ll[, -i, drop = FALSE] %*% omega[-i] fits[[i]] <- optim.pml(fits[[i]], optNni = MixNni, optBf = MixBf, @@ -541,7 +542,8 @@ pmlMix <- function(formula, fit, m = 2, omega = rep(1 / m, m), trace - 1), llMix = pl0, wMix = omega[i]) ll[, i] <- fits[[i]]$lv - if(trace >0) cat("logLik optim.pml", i, ": ", sum(weight * log(ll %*% omega)), "\n") + if(trace >0) cat("logLik optim.pml", i, ": ", + sum(weight * log(ll %*% omega)), "\n") # res <- optW(ll, weight, omega) # omega <- res$p if (MixRate) { From 1a4e448031bc308cfe4d662847f4840179d8b449 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Wed, 17 Apr 2024 15:35:14 +0200 Subject: [PATCH 146/216] update --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 406c5ffe..5f3c32f5 100644 --- a/README.md +++ b/README.md @@ -15,11 +15,11 @@ You can install - [r-universe](https://r-universe.dev/) kindly provides binaries for Windows, Linux and OS X of the development version [here](https://klausvigo.r-universe.dev/phangorn#). -To install the development version you may need to install the Biostrings and seqLogo package from bioconductor first: +To install the development version you may need to install the Biostrings package from bioconductor first: ``` if (!requireNamespace("BiocManager", quietly = TRUE)) install.packages("BiocManager") -BiocManager::install(c("Biostrings", "seqLogo")) +BiocManager::install("Biostrings") ``` Also the development version usually depends on the latest ape development version and information to download can be found From 0fb3099f92a6ce23f6b214dcea3984ba06118733 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Wed, 17 Apr 2024 16:27:15 +0200 Subject: [PATCH 147/216] small improvements --- R/ancestral.R | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/R/ancestral.R b/R/ancestral.R index 8c51d478..8ca25824 100644 --- a/R/ancestral.R +++ b/R/ancestral.R @@ -208,18 +208,30 @@ ancestral <- function(tree, align, prob){ stopifnot(inherits(tree, "phylo")) stopifnot(inherits(align, "phyDat")) stopifnot(inherits(prob, "data.frame")) - erg <- list(tree=tree, data=align, prob=prob) + state <- extract_states(prob, attr(align, "type"), + levels=attr(align, "levels")) + erg <- list(tree=tree, data=align, prob=prob, state=state) class(erg) <- "ancestral" erg } +extract_states <- function(x, type, levels=NULL){ + node_label <- unique(x$"Node") + y <- matrix(x$"State", nrow=length(node_label), byrow=TRUE, + dimnames = list(node_label, NULL)) + if(type %in% c("DNA", "AA")) return( phyDat(y, type=type) ) + phyDat(y, type=type, levels=levels) +} + #' @rdname write.ancestral #' @export print.ancestral <- function(x, ...){ stopifnot(inherits(x, "ancestral")) print(x$tree) + cat("\n") print(x$data) + cat("\n") print(head(x$prob)) } From 93e7edf810a1e1ec7b711608d66eec285e35dfb9 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Wed, 17 Apr 2024 16:36:15 +0200 Subject: [PATCH 148/216] clean up, use definition for color schemes in ape --- R/zzz.R | 80 --------------------------------------------------------- 1 file changed, 80 deletions(-) diff --git a/R/zzz.R b/R/zzz.R index 0c8e79e0..fc911cf3 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -42,86 +42,6 @@ .nucleotideAlphabet <- c("a", "c", "g", "t") -Ape_NT <- list(properties = list( - a="a", g="g", c="c", t="t", n="n", "-"="-"), - color=c("red", "yellow", "green", "blue", "grey", "black")) - - -RY_NT <- list(properties = list( - Purine = c("a", "g", "r"), - Pyrimidine = c("c", "t", "y"), - "n" = "n", - "-" = "-"), - color=c("#FF00FF", "#00FFFF", "grey", "black")) - -Ape_AA <- list(properties = list( - Hydrophobic = c("V", "I", "L", "F", "W", "Y", "M"), - Small = c("P", "G", "A", "C"), - Hydrophilic = c("S", "T", "H", "N", "Q", "D", "E", "K", "R")), - color=c("red", "yellow", "blue")) - -# Properties + Conservation (Clustal X) -Clustal <- list(properties = list( - Hydrophobic = c("A", "I", "L", "M", "F", "W", "V"), - Positive = c("K", "R"), - Negative = c("E", "D"), - Polar = c("N", "Q", "S", "T"), - Glycines = "G", - Prolines = "P", - Aromatic = c("H", "Y"), - Cysteine = "C"), - color= c("#80a0f0", "#f01505", "#c048c0", "#15c015", "#f09048", "#c0c000", - "#15a4a4", "#f08080") -) - - -Polarity <- list(properties = list( - "Non polar" = c("G", "A", "V", "L", "I", "F", "W", "M", "P"), - "Polar, uncharged" = c("S", "T", "C", "Y", "N", "Q"), - "Polar, acidic" = c("D", "E"), - "Polar, basic" = c("K", "R", "H")), - color = c("yellow", "green", "red", "blue")) - -# Physicochemical Properties -Zappo_AA <- list(properties = list( - "Aliphatic/Hydrophobic" = c("I", "L", "V", "A", "M"), - Aromatic = c("F", "W", "Y"), - Positive = c("K", "R", "H"), - Negative = c("E", "D"), - Hydrophilic = c("S", "T", "N", "Q"), - "Conformationally special" = c("P", "G"), - Cysteine = "C"), - color= c("#ff7979", "#f89f56", "#0070c0", "#c00000", "#08c81a", "#cc00cc", - "#ffff00") -) - - -Transmembrane_tendency <- list(properties = list( - Lys = "K", - Asp = "D", - Glu = "E", - Arg = "R", - Gln = "Q", - Asn = "N", - Pro = "P", - His = "H", - Ser = "S", - Thr = "T", - Cys = "C", - Gly = "G", - Ala = "A", - Tyr = "Y", - Met = "M", - Val = "V", - Trp = "W", - Leu = "L", - Ile = "I", - Phe = "F" -), color=c("#0000FF", "#0D00F1", "#1A00E4", "#2800D6", "#3500C9", "#4300BB", - "#5000AE", "#5D00A1", "#6B0093", "#780086", "#860078", "#93006B", - "#A1005D", "#AE0050", "#BB0043", "#C90035", "#D60028", "#E4001A", - "#F1000D", "#FF0000")) - # if rate g[i] is smaller than .gEps invariant site is increased by w[i] .gEps <- 1e-12 From c1442bca29bda4a48a3b97d5ed639e265d06bc67 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Wed, 17 Apr 2024 17:32:46 +0200 Subject: [PATCH 149/216] prevent warnings --- src/dupAtomMat.cpp | 24 +++++++++++----------- src/lessAndEqual.h | 50 +++++++++++++++++++++++----------------------- 2 files changed, 37 insertions(+), 37 deletions(-) diff --git a/src/dupAtomMat.cpp b/src/dupAtomMat.cpp index 29390aa6..92f08b75 100644 --- a/src/dupAtomMat.cpp +++ b/src/dupAtomMat.cpp @@ -3,7 +3,7 @@ vecMap intVecMap; vecMap doubleVecMap; -vecMap charsexpVecMap; +vecMap charsexpVecMap; vecMap rawVecMap; // Rbyte is an alias of unsigned char extern "C" { @@ -13,9 +13,9 @@ SEXP grpDupAtomMat(SEXP x, SEXP MARGIN, SEXP fromLast) SEXP out; int* dim; int nGrps; - dim=INTEGER(getAttrib(x, R_DimSymbol)); - out = PROTECT(allocVector(INTSXP, dim[*INTEGER(MARGIN)-1])); - + dim=INTEGER(Rf_getAttrib(x, R_DimSymbol)); + out = PROTECT(Rf_allocVector(INTSXP, dim[*INTEGER(MARGIN)-1])); + switch (TYPEOF(x)) { case REALSXP: nGrps = doubleVecMap.grpDuplicatedMat (REAL(x), dim, dim+1, INTEGER(out), *INTEGER(MARGIN)==1, (bool)(*(LOGICAL(fromLast))) ); @@ -23,7 +23,7 @@ SEXP grpDupAtomMat(SEXP x, SEXP MARGIN, SEXP fromLast) case INTSXP: // factor type is also covered here // if(!inherits(x, "factor")) nGrps = intVecMap.grpDuplicatedMat (INTEGER(x), dim, dim+1, INTEGER(out), *INTEGER(MARGIN)==1, (bool)(*(LOGICAL(fromLast))) ); - // else {;} + // else {;} break; case LGLSXP: nGrps = intVecMap.grpDuplicatedMat (LOGICAL(x), dim, dim+1, INTEGER(out), *INTEGER(MARGIN)==1, (bool)(*(LOGICAL(fromLast))) ); @@ -32,23 +32,23 @@ SEXP grpDupAtomMat(SEXP x, SEXP MARGIN, SEXP fromLast) CharSEXP* charSexpPtr = new CharSEXP [ dim[0]*dim[1] ]; for(int i=dim[0]*dim[1]-1; i>=0; --i) charSexpPtr[i].sexp = STRING_ELT(x, i); - + nGrps = charsexpVecMap.grpDuplicatedMat (charSexpPtr, dim, dim+1, INTEGER(out), *INTEGER(MARGIN)==1, (bool)(*(LOGICAL(fromLast))) ); - + delete[] charSexpPtr; break; } case RAWSXP: nGrps = rawVecMap.grpDuplicatedMat (RAW(x), dim, dim+1, INTEGER(out), *INTEGER(MARGIN)==1, (bool)(*(LOGICAL(fromLast))) ); break; - default: - error("C function 'grpDupAtomMat' only accepts REALSXP, LGLSXP, INTSXP and STRSXP"); +// default: +// error("C function 'grpDupAtomMat' only accepts REALSXP, LGLSXP, INTSXP and STRSXP"); } - + SEXP nLevels; - nLevels = PROTECT(allocVector(INTSXP, 1)); + nLevels = PROTECT(Rf_allocVector(INTSXP, 1)); INTEGER(nLevels)[0] = nGrps; - setAttrib(out, install("nlevels"), nLevels); + Rf_setAttrib(out, Rf_install("nlevels"), nLevels); UNPROTECT(2); return out; } diff --git a/src/lessAndEqual.h b/src/lessAndEqual.h index 24ecec75..9ca42fe5 100644 --- a/src/lessAndEqual.h +++ b/src/lessAndEqual.h @@ -8,9 +8,9 @@ #include -/* - NOTE: R_NaString is a different SEXP than mkChar("NA"), but holding the same string "NA". - We will treat R_NaString to be smaller than every usual string, including mkChar("NA"). +/* + NOTE: R_NaString is a different SEXP than mkChar("NA"), but holding the same string "NA". + We will treat R_NaString to be smaller than every usual string, including mkChar("NA"). Real NaN becomes mkChar("NaN") by as.character(); Real -Inf becomes mkChar("-Inf") by as.character(); Real Inf becomes mkChar("Inf") by as.character(); @@ -20,37 +20,37 @@ class CharSEXP{ public: SEXP sexp; inline bool valid() {return( TYPEOF(sexp) == CHARSXP );} - + CharSEXP(SEXP x) { if (TYPEOF(x) == CHARSXP) sexp = x; - else error("CharSEXP should be initialized with a CHARSXP type object"); +// else error("CharSEXP should be initialized with a CHARSXP type object"); } - + CharSEXP() { - sexp = R_NaString; + sexp = R_NaString; } - - friend inline bool operator< (const CharSEXP& lhs, const CharSEXP& rhs) + + friend inline bool operator< (const CharSEXP& lhs, const CharSEXP& rhs) { if (lhs.sexp == R_NaString) return( rhs.sexp != R_NaString ); if (rhs.sexp == R_NaString) return(false); - return( + return( strcmp(const_cast(CHAR(lhs.sexp)), const_cast(CHAR(rhs.sexp)) )<0 - ); + ); } - - friend inline bool operator== (const CharSEXP& lhs, const CharSEXP& rhs) - { + + friend inline bool operator== (const CharSEXP& lhs, const CharSEXP& rhs) + { return (lhs.sexp == rhs.sexp); // R CHARSXP objects are cached (only one copy per string) } - + }; -/* for general T where operator< and operator== have been implemented; - * this is helpful for +/* for general T where operator< and operator== have been implemented; + * this is helpful for * (1) integers where NA's is just a special integer value; * (2) unsigned char, where NA's is converted to 00; * (3) CharSEXP, where NA's are properly handled by operator<. @@ -63,25 +63,25 @@ template /* double Assumptions: NaN < NA_real_ < -Inf < Finite numbers < Inf */ template <> -inline bool lessThan(const double& lhs, const double& rhs) +inline bool lessThan(const double& lhs, const double& rhs) { if (R_FINITE(lhs) && R_FINITE(rhs)) return lhs< rhs; // probably the most common case (both finite) - - bool rhsTest = R_IsNaN(rhs); // rhs = NaN + + bool rhsTest = R_IsNaN(rhs); // rhs = NaN if (R_IsNaN(lhs)) return !rhsTest; // lhs = NaN - + rhsTest = rhsTest || ISNA(rhs); // rhs <= NA_real_ if (ISNA(lhs)) return !rhsTest; // lhs = NA - + rhsTest = rhsTest || (rhs == R_NegInf); // rhs <= -Inf if (lhs == R_NegInf) return !rhsTest; // lhs = -Inf - - if(rhsTest) return false; // lhs is finite or +Inf but rhs <= -Inf + + if(rhsTest) return false; // lhs is finite or +Inf but rhs <= -Inf return R_FINITE(lhs); // lhs is finite or +Inf but rhs is +Inf } template <> -inline bool equalTo (const double& lhs, const double& rhs) +inline bool equalTo (const double& lhs, const double& rhs) {return( (lhs == rhs) || (ISNA(lhs) && ISNA(rhs)) || From 796fed5bf9d6a726232b5d541feb6be23746827a Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Wed, 17 Apr 2024 17:56:56 +0200 Subject: [PATCH 150/216] small improvement --- R/ancestral.R | 2 +- man/write.ancestral.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/ancestral.R b/R/ancestral.R index 8ca25824..214e5cea 100644 --- a/R/ancestral.R +++ b/R/ancestral.R @@ -179,7 +179,7 @@ ancestral.pml <- function(object, type = "marginal", ...) { #' \code{write.ancestral} allows to export ancestral reconstructions. It writes #' out the tree, a tab delimited text file with the probabilities and the #' alignment. \code{read.ancestral} tries to read in these files. -#' \code{ancestral} generates an object of call ancestral. +#' \code{ancestral} generates an object of class ancestral. #' #' This allows also to read in reconstruction made by iqtree to use the #' plotting capabilities of R. diff --git a/man/write.ancestral.Rd b/man/write.ancestral.Rd index ea611255..6afc4650 100644 --- a/man/write.ancestral.Rd +++ b/man/write.ancestral.Rd @@ -30,7 +30,7 @@ each state and site.} \code{write.ancestral} allows to export ancestral reconstructions. It writes out the tree, a tab delimited text file with the probabilities and the alignment. \code{read.ancestral} tries to read in these files. -\code{ancestral} generates an object of call ancestral. +\code{ancestral} generates an object of class ancestral. } \details{ This allows also to read in reconstruction made by iqtree to use the From a929e35489a579e88cd977aa2f97e6aaaa2ed606 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Wed, 17 Apr 2024 18:19:05 +0200 Subject: [PATCH 151/216] scale tree --- inst/tinytest/test_modelTest.R | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/inst/tinytest/test_modelTest.R b/inst/tinytest/test_modelTest.R index d14a6380..ca6b8ef6 100644 --- a/inst/tinytest/test_modelTest.R +++ b/inst/tinytest/test_modelTest.R @@ -38,9 +38,12 @@ expect_equal(MT_AA_all$Model[which.min(MT_AA_all$BIC)], "WAG") # test user defined states tree <- rcoal(10) +tree$edge.length <- 2 * (tree$edge.length / sum(tree$edge.length)) +tree$edge.length[tree$edge[,2]<11] <- tree$edge.length[tree$edge[,2]<11] + .1 + Z <- simSeq(tree, Q = c(1,0,0,1,0,1), type = "USER", levels=c("A", "B", "C", "D")) -MT_USER <- modelTest(Z, I=TRUE, G=TRUE, +MT_USER <- modelTest(Z, tree=tree, I=TRUE, G=TRUE, control = pml.control(epsilon = 1e-08, maxit = 5, trace = 0), multicore = TRUE, mc.cores = 2L) expect_equal(MT_USER$Model[which.min(MT_USER$BIC)], "ORDERED") From 71e93073d11141772746da338af676ac76e3bd6e Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Thu, 18 Apr 2024 12:17:39 +0200 Subject: [PATCH 152/216] add links, try black gaps in plots --- R/ancestral.R | 1 + R/image_phyDat.R | 3 ++- R/plotAnc.R | 4 ++-- man/image.phyDat.Rd | 3 ++- man/write.ancestral.Rd | 3 +++ 5 files changed, 10 insertions(+), 4 deletions(-) diff --git a/R/ancestral.R b/R/ancestral.R index 214e5cea..fdd57fb7 100644 --- a/R/ancestral.R +++ b/R/ancestral.R @@ -186,6 +186,7 @@ ancestral.pml <- function(object, type = "marginal", ...) { #' @param x an object of class ancestral. #' @param file a file name. File endings are added. #' @param ... Further arguments passed to or from other methods. +#' @seealso \code{\link{ancestral.pml}}, \code{\link{plot.ancestral}} #' @rdname write.ancestral #' @export write.ancestral <- function(x, file="ancestral"){ diff --git a/R/image_phyDat.R b/R/image_phyDat.R index c7d52758..d99475db 100644 --- a/R/image_phyDat.R +++ b/R/image_phyDat.R @@ -5,7 +5,8 @@ #' A wrapper for using \code{\link{image.DNAbin}} and \code{\link{image.AAbin}}. #' @param x an object containing sequences, an object of class \code{phyDat}. #' @param ... further arguments passed to or from other methods. -#' @seealso \code{\link{image.DNAbin}}, \code{\link{image.AAbin}} +#' @seealso \code{\link{image.DNAbin}}, \code{\link{image.AAbin}}, +#' \code{\link{image.ancestral}} #' @method image phyDat #' @export image.phyDat <- function(x, ...){ diff --git a/R/plotAnc.R b/R/plotAnc.R index c705eab6..f556fbb9 100644 --- a/R/plotAnc.R +++ b/R/plotAnc.R @@ -125,7 +125,7 @@ plotAnc <- function(x, i = 1, col = NULL, sc <- get(scheme, environment(ace)) if(has_gap_state(data) && attr(data, "type")=="AA"){ sc$properties <- c(sc$properties, Gap="-") - sc$color <- c(sc$color, "#FFFFFF") + sc$color <- c(sc$color, "#000000") } if(attr(data, "type")=="DNA"){ ind <- match("n", names(sc$properties)) @@ -181,7 +181,7 @@ plotSeqLogo <- function(x, node, start=1, end=10, scheme="Ape_NT", ...){ sc <- get(scheme, environment(ace)) if(has_gap_state(x$data) && type=="AA"){ sc$properties <- c(sc$properties, Gap="-") - sc$color <- c(sc$color, "#FFFFFF") + sc$color <- c(sc$color, "#000000") } l <- lengths(sc$properties) SC <- make_col_scheme(chars = toupper(unlist(sc$properties)), diff --git a/man/image.phyDat.Rd b/man/image.phyDat.Rd index 8917463c..e5c208a8 100644 --- a/man/image.phyDat.Rd +++ b/man/image.phyDat.Rd @@ -18,5 +18,6 @@ This function plots an image of an alignment of sequences. A wrapper for using \code{\link{image.DNAbin}} and \code{\link{image.AAbin}}. } \seealso{ -\code{\link{image.DNAbin}}, \code{\link{image.AAbin}} +\code{\link{image.DNAbin}}, \code{\link{image.AAbin}}, +\code{\link{image.ancestral}} } diff --git a/man/write.ancestral.Rd b/man/write.ancestral.Rd index 6afc4650..b474be9f 100644 --- a/man/write.ancestral.Rd +++ b/man/write.ancestral.Rd @@ -36,3 +36,6 @@ alignment. \code{read.ancestral} tries to read in these files. This allows also to read in reconstruction made by iqtree to use the plotting capabilities of R. } +\seealso{ +\code{\link{ancestral.pml}}, \code{\link{plot.ancestral}} +} From 19264996d69c1ef053f89b12901c51e080870c1a Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Thu, 18 Apr 2024 12:43:53 +0200 Subject: [PATCH 153/216] nicer messages, encapsulate code --- R/phylo.R | 130 +++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 94 insertions(+), 36 deletions(-) diff --git a/R/phylo.R b/R/phylo.R index 295f88c9..6c2f0ceb 100644 --- a/R/phylo.R +++ b/R/phylo.R @@ -588,7 +588,7 @@ optimEdge <- function(tree, data, eig = eig, w = w, g = g, bf = bf, rate = rate, eps <- (old.ll - newll) / newll if (eps < 0) return(list(tree=oldtree, logLik=old.ll)) oldtree <- treeP - if (control$trace > 1) cat(old.ll, " -> ", newll, "\n") +# if (control$trace > 1) cat(old.ll, " -> ", newll, "\n") old.ll <- newll } if (control$trace > 0) @@ -1971,8 +1971,8 @@ rooted.nni <- function(tree, data, eig, w, g, bf, rate, ll.0, INV, RELL=NULL, # if(aLRT) return(support_quartet) ll2 <- pml.fit4(tree, data, bf=bf, eig=eig, ll.0=ll.0, w=w, g=g, ...) eps <- (ll - ll2) / ll2 - if (control$trace > 1) cat(ll, " -> ", ll2, "\n") - if (control$trace > 1) cat("swap:", nchanges) + if (control$trace > 1) cat("optimize topology: ", ll, "-->", ll2, + " NNI moves: ", nchanges, "\n") ll <- ll2 iter <- iter + 1 } @@ -2009,6 +2009,70 @@ updateRates <- function(res, ll, rate, shape, k, inv, wMix, update="rate", } + +# help functions inside optim.pml +.ratchet_fun <- function(tree, data, ...){ + weight <- attr(data, "weight") + v <- rep(seq_along(weight), weight) + attr(data, "weight") <- tabulate(sample(v, replace = TRUE), + length(weight)) + res <- opt_Edge(tree, data, ...) + ll2 <- res[[2]] + opt_nni(tree, data, iter_max=5, trace=0, ll=ll2, ...)$tree +} + + +.di2multi2di <- function(tree2, tau, method){ + # tree2 <- di2multi(tree, tol = 10 * tau, tip2root = TRUE) # raus + if (!is.binary(tree2)) { + tree2 <- multi2di(tree2) + if(method=="unrooted" & is.rooted(tree2)) tree2 <- unroot(tree2) + tree2 <- minEdge(tree2, 3*tau, method=="ultrametric") + } + attr(tree2, "order") <- NULL + tree2 <- reorder(tree2, "postorder") + tree2 +} + + +.stochastic_fun <- function(tree, dm, tau, method, tip.dates, ratchet.par){ + tree <- di2multi(tree, tol = 10 * tau, tip2root = TRUE) + tree <- .di2multi2di(tree, tau, method) + tree <- reorder(tree, "postorder") + nTips <- Ntip(tree) + tree <- rNNI(tree, moves = round(nTips * ratchet.par$prop), n = 1) + if(method=="unrooted") return(tree) + if(method=="ultrametric"){ + tree <- nnls.tree(dm, tree) + tree <- midpoint(tree) + } + if(method=="tipdated") tree <- rtt(tree, tip.dates = tip.dates) + tree <- nnls.tree(dm, tree, method = method, tip.dates=tip.dates) + tree <- minEdge(tree, 5*tau, method=="ultrametric") + attr(tree, order) <- NULL + tree <- reorder(tree, "postorder") + tree +} + + +.updateQBF <- function(object, model, tmp){ + nc <- attr(object$data, "nc") + Q <- object$Q + bf <- object$bf + if(!tmp$optQ) Q <- rep(1, (nc*(nc-1L))/2) + if(model=="ORDERED") Q <- tmp$Q + if(!tmp$optBf){ + if(has_gap_state(object$data)){ + bf <- baseFreq(object$data) + bf[-nc] <- (1 - bf[nc]) / (nc-1) + } else bf <- rep(1 / nc, nc) + } + object <- update(object, Q=Q, bf=bf) + object +} + + + #' @rdname pml #' @aliases optim.pml #' @export @@ -2142,10 +2206,11 @@ optim.pml <- function(object, optNni = FALSE, optBf = FALSE, optQ = FALSE, nr <- as.integer(attr(data, "nr")) nc <- as.integer(attr(data, "nc")) if (type == "DNA" & optModel) { + # .optBFQ tmp <- subsChoice(model, has_gap_state(data)) optQ <- tmp$optQ if (!optQ) { - Q <- rep(1, 6) + Q <- rep(1, (nc*(nc-1L))/2) object <- update.pml(object, Q = Q) } optBf <- tmp$optBf @@ -2383,14 +2448,14 @@ optim.pml <- function(object, optNni = FALSE, optBf = FALSE, optQ = FALSE, res <- optimFreeRate(tree, data, g = g, k = k, w = w, inv = inv, INV = INV, bf = bf, eig = eig, ll.0 = ll.0, rate = rate) - scale <- function(tree, g, w){ - blub <- sum(g * w) - g <- g / blub - tree$edge.length <- tree$edge.length * blub - list(tree=tree, g=g) - } +# scale <- function(tree, g, w){ +# blub <- sum(g * w) +# g <- g / blub +# tree$edge.length <- tree$edge.length * blub +# list(tree=tree, g=g) +# } if(res[[2]] > ll){ - tmp_sc <- scale(tree, res[[1]], w) +# tmp_sc <- scale(tree, res[[1]], w) g0 <- res[[1]] blub <- sum(g0 * w) g <- g0 / blub @@ -2448,32 +2513,25 @@ optim.pml <- function(object, optNni = FALSE, optBf = FALSE, optQ = FALSE, if((rearrangement == "stochastic" || rearrangement == "ratchet") && optRooted){ dm <- dist.ml(data, bf=bf, Q=Q, exclude = "pairwise") } - ratchet_fun <- function(tree, data, ...){ - weight <- attr(data, "weight") - v <- rep(seq_along(weight), weight) - attr(data, "weight") <- tabulate(sample(v, replace = TRUE), - length(weight)) - res <- opt_Edge(tree, data, ...) - ll2 <- res[[2]] - opt_nni(tree, data, iter_max=5, trace=0, ll=ll2, ...)$tree - } for(i in seq_len(maxit)){ if(rearrangement == "stochastic"){ - tree2 <- di2multi(tree, tol = 10 * tau, tip2root = TRUE) - if (!is.binary(tree2)) { - tree2 <- multi2di(tree2) - if(!optRooted) tree2 <- unroot(tree2) - tree2 <- minEdge(tree2, tau) - tree2 <- reorder(tree2, "postorder") - } - tree2 <- rNNI(tree2, moves = round(nTips * ratchet.par$prop), n = 1) - if(optRooted){ - tree2 <- nnls.tree(dm, tree2, method = method, - tip.dates=tip.dates) - tree2 <- minEdge(tree2, 10*tau) - } + tree2 <- .stochastic_fun(tree, dm, tau, method, tip.dates, + ratchet.par) + +# tree2 <- di2multi(tree, tol = 10 * tau, tip2root = TRUE) +# if (!is.binary(tree2)) { +# tree2 <- multi2di(tree2) +# if(!optRooted) tree2 <- unroot(tree2) +# tree2 <- minEdge(tree2, tau) +# tree2 <- reorder(tree2, "postorder") +# } +# tree2 <- rNNI(tree2, moves = round(nTips * ratchet.par$prop), n = 1) +# if(optRooted){ +# tree2 <- nnls.tree(dm, tree2, method = method, tip.dates=tip.dates) +# tree2 <- minEdge(tree2, 10*tau) +# } } else if(rearrangement == "ratchet"){ - tree2 <- ratchet_fun(tree, data, rooted=optRooted, w = w, g = g, + tree2 <- .ratchet_fun(tree, data, rooted=optRooted, w = w, g = g, eig = eig, bf = bf, inv=inv, rate=rate, ll.0 = ll.0, INV = INV, llMix = llMix, wMix=wMix, ASC=ASC, @@ -2486,7 +2544,7 @@ optim.pml <- function(object, optNni = FALSE, optBf = FALSE, optQ = FALSE, # tree2 <- reorder(tree2, "postorder") } else if(rearrangement == "multi2di"){ tree2 <- di2multi(tree, tol=10*tau, tip2root=TRUE) - if(any(degree(tree2)>4)){ + if(any(tabulate(tree2$edge)>3)){ tree2 <- multi2di(tree2) if(!optRooted) tree2 <- unroot(tree2) tree2 <- minEdge(tree2, tau) @@ -2667,7 +2725,7 @@ optimQuartet <- function(tree, data, eig, w, g, bf, rate, ll.0, nTips, # if (control$trace > 1) cat(old.ll, " -> ", newll, "\n") old.ll <- newll } - if (control$trace > 0) cat(start.ll, " -> ", newll, "\n") +# if (control$trace > 0) cat(start.ll, " -> ", newll, "\n") list(tree = tree, logLik = newll, c(eps, iter)) } From ffbab19452d1c63e0066452083c17387749118e6 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Thu, 18 Apr 2024 13:47:17 +0200 Subject: [PATCH 154/216] add rechceck workflow --- .github/workflows/recheck.yml | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 .github/workflows/recheck.yml diff --git a/.github/workflows/recheck.yml b/.github/workflows/recheck.yml new file mode 100644 index 00000000..323d2ad3 --- /dev/null +++ b/.github/workflows/recheck.yml @@ -0,0 +1,18 @@ +on: + workflow_dispatch: + inputs: + which: + type: choice + description: Which dependents to check + options: + - strong + - most + +name: Reverse dependency check + +jobs: + revdep_check: + name: Reverse check ${{ inputs.which }} dependents + uses: r-devel/recheck/.github/workflows/recheck.yml@v1 + with: + which: ${{ inputs.which }} From ed09a47668531384d869f5c0fbc5a28b82ad64c4 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Fri, 19 Apr 2024 10:42:42 +0200 Subject: [PATCH 155/216] scale edges if necessary --- R/phylo.R | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/R/phylo.R b/R/phylo.R index 6c2f0ceb..ce7dd58a 100644 --- a/R/phylo.R +++ b/R/phylo.R @@ -2142,6 +2142,12 @@ optim.pml <- function(object, optNni = FALSE, optBf = FALSE, optQ = FALSE, } } } + if(optRooted == FALSE && optEdge == TRUE){ + # rescale tree if edges are likely too long + pscore <- fitch(tree, data) / sum(attr(data, "weight")) + if(sum(tree$edge.length) > 3*pscore) + tree$edge.length <- tree$edge.length * 3*pscore / sum(tree$edge.length) + } if (optNni) { if(!timetree){ mapping <- map_duplicates(data) From a7f6ca403e0e8bc68c0ff015c0561e98fe2ea2d2 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Fri, 19 Apr 2024 14:58:05 +0200 Subject: [PATCH 156/216] more consistent coloring --- R/plotAnc.R | 52 +++++++++++++++++++++++----------------------------- 1 file changed, 23 insertions(+), 29 deletions(-) diff --git a/R/plotAnc.R b/R/plotAnc.R index f556fbb9..6bc54fbd 100644 --- a/R/plotAnc.R +++ b/R/plotAnc.R @@ -34,13 +34,8 @@ getAncDF <- function(x){ #' @param x an object of class \code{ancestral}. ## @param site.pattern logical, plot i-th site pattern or i-th site #' @param i,site plots the i-th site. -#' @param which a subset of the numbers 1:3, by default 1:3, referring to -#' \enumerate{ -#' \item "tree with pie charts" plot -#' \item "seqlogo" plot -#' \item "image" plot -#' } -#' @param node to plot for which the propabilities should be plotted. +#' @param which either "pie" or "seqlogo" +#' @param node to plot for which the probabilities should be plotted. #' @param start start position to plot. #' @param end end position to plot. #' @param col a vector containing the colors for all possible states. @@ -62,35 +57,29 @@ getAncDF <- function(x){ #' tree <- makeNodeLabel(tree) #' anc.p <- ancestral.pars(tree, Laurasiatherian) #' # plot the third character -#' ## plotAnc(tree, anc.p, 3) -#' plotAnc(anc.p, 3) +#' plotAnc(anc.p, 3, pos="topright") #' plotSeqLogo(anc.p, node="Node10", 1, 25) #' #' data(chloroplast) #' tree <- pratchet(chloroplast, maxit=10, trace=0) #' tree <- makeNodeLabel(tree) #' anc.ch <- ancestral.pars(tree, chloroplast) -#' image(chloroplast[, 1:25]) -#' plotAnc(anc.ch, 21, scheme="Ape_AA") -#' plotAnc(anc.ch, 21, scheme="Clustal") +#' image(as.phyDat(anc.ch)[, 1:25]) +#' plotAnc(anc.ch, 21, scheme="Ape_AA", pos="topleft") +#' plotAnc(anc.ch, 21, scheme="Clustal", pos="topleft") #' plotSeqLogo(anc.ch, node="Node1", 1, 25, scheme="Clustal") #' @importFrom grDevices hcl.colors #' @importFrom ggseqlogo make_col_scheme ggseqlogo #' @rdname plot.ancestral #' @export -plot.ancestral <- function(x, which = c(1, 2, 3), site = 1, - node=NULL, col = NULL, cex.pie = .5, pos = "bottomright", +plot.ancestral <- function(x, which = c("pie", "seqlogo"), site = 1, + node=getRoot(x$tree), col = NULL, cex.pie = .5, pos = "bottomright", scheme=NULL, start=1, end=10, ...){ stopifnot(inherits(x, "ancestral")) - if (!is.numeric(which) || any(which < 1) || any(which > 3)) - stop("'which' must be in 1:3") -# which <- match.arg(which, c("pie", "seqlogo", "image"), TRUE) - show <- rep(FALSE, 3) - show[which] <- TRUE - if(show[1L])plotAnc(x, i = site, col = col, cex.pie = cex.pie, pos = pos, + which <- match.arg(which, c("pie", "seqlogo"), TRUE) + if(which=="pie")plotAnc(x, i = site, col = col, cex.pie = cex.pie, pos = pos, scheme=scheme, ...) - if(show[2L])plotSeqLogo(x, node, start=start, end=end, scheme=scheme, ...) - if(show[3L])image(x, scheme=scheme, ...) + if(which=="seqlogo")plotSeqLogo(x, node, start=start, end=end, scheme=scheme, ...) } @@ -103,6 +92,7 @@ plotAnc <- function(x, i = 1, col = NULL, stopifnot(inherits(x, "ancestral")) df <- getAncDF(x) data <- x$data + type <- attr(data, "type") tree <- x$tree Y <- subset(df, Site==i) y <- as.matrix(Y[, -c(1:3)]) @@ -119,6 +109,8 @@ plotAnc <- function(x, i = 1, col = NULL, xrad <- CEX * diff(par("usr")[1:2]) / 50 levels <- attr(data, "levels") nc <- attr(data, "nc") + if(is.null(scheme) & type=="AA") scheme="Ape_AA" + if(is.null(scheme) & type=="DNA") scheme="Ape_NT" if(!is.null(scheme)){ scheme <- match.arg(scheme, c("Ape_AA", "Zappo_AA", "Clustal", "Polarity", "Transmembrane_tendency", "Ape_NT", "RY_NT")) @@ -162,7 +154,7 @@ plotAnc <- function(x, i = 1, col = NULL, #' @rdname plot.ancestral #' @export -plotSeqLogo <- function(x, node, start=1, end=10, scheme="Ape_NT", ...){ +plotSeqLogo <- function(x, node=getRoot(x$tree), start=1, end=10, scheme="Ape_NT", ...){ stopifnot(inherits(x, "ancestral")) type <- attr(x$data, "type") tree <- x$tree @@ -175,6 +167,8 @@ plotSeqLogo <- function(x, node, start=1, end=10, scheme="Ape_NT", ...){ X <- t(as.matrix(X[, -c(1:3)])) tmp <- gsub("p_", "", rownames(X)) lev <- rownames(X) <- toupper(tmp) + if(is.null(scheme) & type=="AA") scheme="Ape_AA" + if(is.null(scheme) & type=="DNA") scheme="Ape_NT" if(!is.null(scheme)){ scheme <- match.arg(scheme, c("Ape_AA", "Zappo_AA", "Clustal", "Polarity", "Transmembrane_tendency", "Ape_NT", "RY_NT")) @@ -194,11 +188,11 @@ plotSeqLogo <- function(x, node, start=1, end=10, scheme="Ape_NT", ...){ } -#' @rdname plot.ancestral -#' @export -image.ancestral <- function(x, ...){ - tmp <- c(x$"data", x$"state") - image(tmp, ...) -} +##' @rdname plot.ancestral +##' @export +#image.ancestral <- function(x, ...){ +# tmp <- rbind(x$"data", x$"state") +# image(tmp, ...) +#} From c1a80a5bdff80bd32abfe7bd15d0120c6892741c Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Fri, 19 Apr 2024 14:58:05 +0200 Subject: [PATCH 157/216] simpler workflow --- R/ancestral.R | 11 ++++++--- R/plotAnc.R | 52 +++++++++++++++++++------------------------ man/ancestral.pml.Rd | 3 +++ man/plot.ancestral.Rd | 30 +++++++++---------------- 4 files changed, 45 insertions(+), 51 deletions(-) diff --git a/R/ancestral.R b/R/ancestral.R index fdd57fb7..e1a2e472 100644 --- a/R/ancestral.R +++ b/R/ancestral.R @@ -236,9 +236,14 @@ print.ancestral <- function(x, ...){ print(head(x$prob)) } -## @rdname ancestral.pml -## @export -#as.phyDat.ancestral <- function(x, ...) { +#' @rdname ancestral.pml +#' @export +as.phyDat.ancestral <- function(x, ...) { + rbind(x$data, x$state) +} + + + highest_state <- function(x, ...) { type <- attr(x, "type") fun2 <- function(x) { diff --git a/R/plotAnc.R b/R/plotAnc.R index f556fbb9..6bc54fbd 100644 --- a/R/plotAnc.R +++ b/R/plotAnc.R @@ -34,13 +34,8 @@ getAncDF <- function(x){ #' @param x an object of class \code{ancestral}. ## @param site.pattern logical, plot i-th site pattern or i-th site #' @param i,site plots the i-th site. -#' @param which a subset of the numbers 1:3, by default 1:3, referring to -#' \enumerate{ -#' \item "tree with pie charts" plot -#' \item "seqlogo" plot -#' \item "image" plot -#' } -#' @param node to plot for which the propabilities should be plotted. +#' @param which either "pie" or "seqlogo" +#' @param node to plot for which the probabilities should be plotted. #' @param start start position to plot. #' @param end end position to plot. #' @param col a vector containing the colors for all possible states. @@ -62,35 +57,29 @@ getAncDF <- function(x){ #' tree <- makeNodeLabel(tree) #' anc.p <- ancestral.pars(tree, Laurasiatherian) #' # plot the third character -#' ## plotAnc(tree, anc.p, 3) -#' plotAnc(anc.p, 3) +#' plotAnc(anc.p, 3, pos="topright") #' plotSeqLogo(anc.p, node="Node10", 1, 25) #' #' data(chloroplast) #' tree <- pratchet(chloroplast, maxit=10, trace=0) #' tree <- makeNodeLabel(tree) #' anc.ch <- ancestral.pars(tree, chloroplast) -#' image(chloroplast[, 1:25]) -#' plotAnc(anc.ch, 21, scheme="Ape_AA") -#' plotAnc(anc.ch, 21, scheme="Clustal") +#' image(as.phyDat(anc.ch)[, 1:25]) +#' plotAnc(anc.ch, 21, scheme="Ape_AA", pos="topleft") +#' plotAnc(anc.ch, 21, scheme="Clustal", pos="topleft") #' plotSeqLogo(anc.ch, node="Node1", 1, 25, scheme="Clustal") #' @importFrom grDevices hcl.colors #' @importFrom ggseqlogo make_col_scheme ggseqlogo #' @rdname plot.ancestral #' @export -plot.ancestral <- function(x, which = c(1, 2, 3), site = 1, - node=NULL, col = NULL, cex.pie = .5, pos = "bottomright", +plot.ancestral <- function(x, which = c("pie", "seqlogo"), site = 1, + node=getRoot(x$tree), col = NULL, cex.pie = .5, pos = "bottomright", scheme=NULL, start=1, end=10, ...){ stopifnot(inherits(x, "ancestral")) - if (!is.numeric(which) || any(which < 1) || any(which > 3)) - stop("'which' must be in 1:3") -# which <- match.arg(which, c("pie", "seqlogo", "image"), TRUE) - show <- rep(FALSE, 3) - show[which] <- TRUE - if(show[1L])plotAnc(x, i = site, col = col, cex.pie = cex.pie, pos = pos, + which <- match.arg(which, c("pie", "seqlogo"), TRUE) + if(which=="pie")plotAnc(x, i = site, col = col, cex.pie = cex.pie, pos = pos, scheme=scheme, ...) - if(show[2L])plotSeqLogo(x, node, start=start, end=end, scheme=scheme, ...) - if(show[3L])image(x, scheme=scheme, ...) + if(which=="seqlogo")plotSeqLogo(x, node, start=start, end=end, scheme=scheme, ...) } @@ -103,6 +92,7 @@ plotAnc <- function(x, i = 1, col = NULL, stopifnot(inherits(x, "ancestral")) df <- getAncDF(x) data <- x$data + type <- attr(data, "type") tree <- x$tree Y <- subset(df, Site==i) y <- as.matrix(Y[, -c(1:3)]) @@ -119,6 +109,8 @@ plotAnc <- function(x, i = 1, col = NULL, xrad <- CEX * diff(par("usr")[1:2]) / 50 levels <- attr(data, "levels") nc <- attr(data, "nc") + if(is.null(scheme) & type=="AA") scheme="Ape_AA" + if(is.null(scheme) & type=="DNA") scheme="Ape_NT" if(!is.null(scheme)){ scheme <- match.arg(scheme, c("Ape_AA", "Zappo_AA", "Clustal", "Polarity", "Transmembrane_tendency", "Ape_NT", "RY_NT")) @@ -162,7 +154,7 @@ plotAnc <- function(x, i = 1, col = NULL, #' @rdname plot.ancestral #' @export -plotSeqLogo <- function(x, node, start=1, end=10, scheme="Ape_NT", ...){ +plotSeqLogo <- function(x, node=getRoot(x$tree), start=1, end=10, scheme="Ape_NT", ...){ stopifnot(inherits(x, "ancestral")) type <- attr(x$data, "type") tree <- x$tree @@ -175,6 +167,8 @@ plotSeqLogo <- function(x, node, start=1, end=10, scheme="Ape_NT", ...){ X <- t(as.matrix(X[, -c(1:3)])) tmp <- gsub("p_", "", rownames(X)) lev <- rownames(X) <- toupper(tmp) + if(is.null(scheme) & type=="AA") scheme="Ape_AA" + if(is.null(scheme) & type=="DNA") scheme="Ape_NT" if(!is.null(scheme)){ scheme <- match.arg(scheme, c("Ape_AA", "Zappo_AA", "Clustal", "Polarity", "Transmembrane_tendency", "Ape_NT", "RY_NT")) @@ -194,11 +188,11 @@ plotSeqLogo <- function(x, node, start=1, end=10, scheme="Ape_NT", ...){ } -#' @rdname plot.ancestral -#' @export -image.ancestral <- function(x, ...){ - tmp <- c(x$"data", x$"state") - image(tmp, ...) -} +##' @rdname plot.ancestral +##' @export +#image.ancestral <- function(x, ...){ +# tmp <- rbind(x$"data", x$"state") +# image(tmp, ...) +#} diff --git a/man/ancestral.pml.Rd b/man/ancestral.pml.Rd index 2ffc2140..e57d226f 100644 --- a/man/ancestral.pml.Rd +++ b/man/ancestral.pml.Rd @@ -2,12 +2,15 @@ % Please edit documentation in R/ancestral.R \name{ancestral.pml} \alias{ancestral.pml} +\alias{as.phyDat.ancestral} \alias{ancestral.pars} \alias{pace} \title{Ancestral character reconstruction.} \usage{ ancestral.pml(object, type = "marginal", ...) +\method{as.phyDat}{ancestral}(x, ...) + ancestral.pars(tree, data, type = c("MPR", "ACCTRAN", "POSTORDER"), cost = NULL, ...) diff --git a/man/plot.ancestral.Rd b/man/plot.ancestral.Rd index 4b3ff20f..4b6d6943 100644 --- a/man/plot.ancestral.Rd +++ b/man/plot.ancestral.Rd @@ -4,31 +4,24 @@ \alias{plot.ancestral} \alias{plotAnc} \alias{plotSeqLogo} -\alias{image.ancestral} \title{Plot ancestral character on a tree} \usage{ -\method{plot}{ancestral}(x, which = c(1, 2, 3), site = 1, node = NULL, - col = NULL, cex.pie = 0.5, pos = "bottomright", scheme = NULL, - start = 1, end = 10, ...) +\method{plot}{ancestral}(x, which = c("pie", "seqlogo"), site = 1, + node = getRoot(x$tree), col = NULL, cex.pie = 0.5, + pos = "bottomright", scheme = NULL, start = 1, end = 10, ...) plotAnc(x, i = 1, col = NULL, cex.pie = 0.5, pos = "bottomright", scheme = NULL, ...) -plotSeqLogo(x, node, start = 1, end = 10, scheme = "Ape_NT", ...) - -\method{image}{ancestral}(x, ...) +plotSeqLogo(x, node = getRoot(x$tree), start = 1, end = 10, + scheme = "Ape_NT", ...) } \arguments{ \item{x}{an object of class \code{ancestral}.} -\item{which}{a subset of the numbers 1:3, by default 1:3, referring to -\enumerate{ -\item "tree with pie charts" plot -\item "seqlogo" plot -\item "image" plot -}} +\item{which}{either "pie" or "seqlogo"} -\item{node}{to plot for which the propabilities should be plotted.} +\item{node}{to plot for which the probabilities should be plotted.} \item{col}{a vector containing the colors for all possible states.} @@ -64,17 +57,16 @@ example(NJ) tree <- makeNodeLabel(tree) anc.p <- ancestral.pars(tree, Laurasiatherian) # plot the third character -## plotAnc(tree, anc.p, 3) -plotAnc(anc.p, 3) +plotAnc(anc.p, 3, pos="topright") plotSeqLogo(anc.p, node="Node10", 1, 25) data(chloroplast) tree <- pratchet(chloroplast, maxit=10, trace=0) tree <- makeNodeLabel(tree) anc.ch <- ancestral.pars(tree, chloroplast) -image(chloroplast[, 1:25]) -plotAnc(anc.ch, 21, scheme="Ape_AA") -plotAnc(anc.ch, 21, scheme="Clustal") +image(as.phyDat(anc.ch)[, 1:25]) +plotAnc(anc.ch, 21, scheme="Ape_AA", pos="topleft") +plotAnc(anc.ch, 21, scheme="Clustal", pos="topleft") plotSeqLogo(anc.ch, node="Node1", 1, 25, scheme="Clustal") } \seealso{ From 29eb64fbbca83d7af591f42e00eb848d29e9c748 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Tue, 23 Apr 2024 11:11:24 +0200 Subject: [PATCH 158/216] add supoort codons --- R/image_phyDat.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/image_phyDat.R b/R/image_phyDat.R index d99475db..179d61d5 100644 --- a/R/image_phyDat.R +++ b/R/image_phyDat.R @@ -5,13 +5,13 @@ #' A wrapper for using \code{\link{image.DNAbin}} and \code{\link{image.AAbin}}. #' @param x an object containing sequences, an object of class \code{phyDat}. #' @param ... further arguments passed to or from other methods. -#' @seealso \code{\link{image.DNAbin}}, \code{\link{image.AAbin}}, -#' \code{\link{image.ancestral}} +#' @seealso \code{\link{image.DNAbin}}, \code{\link{image.AAbin}} #' @method image phyDat #' @export image.phyDat <- function(x, ...){ x <- gap_as_ambiguous(x) + if(attr(x, "type") == "CODON") x <- codon2dna(x) if(attr(x, "type") == "AA") image(as.AAbin(x), ...) if(attr(x, "type") == "DNA") image(as.DNAbin(x), ...) - else return(NULL) + if(attr(x, "USER") ) return(NULL) } From c76a40d9b53b325ccb9619aa437cb346dd532aae Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Tue, 23 Apr 2024 11:16:12 +0200 Subject: [PATCH 159/216] add suport for codons --- man/image.phyDat.Rd | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/man/image.phyDat.Rd b/man/image.phyDat.Rd index e5c208a8..8917463c 100644 --- a/man/image.phyDat.Rd +++ b/man/image.phyDat.Rd @@ -18,6 +18,5 @@ This function plots an image of an alignment of sequences. A wrapper for using \code{\link{image.DNAbin}} and \code{\link{image.AAbin}}. } \seealso{ -\code{\link{image.DNAbin}}, \code{\link{image.AAbin}}, -\code{\link{image.ancestral}} +\code{\link{image.DNAbin}}, \code{\link{image.AAbin}} } From d8c0e9f1c4eb6590989b282ab310e594b8909e64 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Tue, 23 Apr 2024 11:17:52 +0200 Subject: [PATCH 160/216] try avoiding nodes, add as.phyDat.ancestral --- R/ancestral.R | 1 + R/plotAnc.R | 17 +++++++++++++---- man/ancestral.pml.Rd | 2 ++ 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/R/ancestral.R b/R/ancestral.R index e1a2e472..578f493b 100644 --- a/R/ancestral.R +++ b/R/ancestral.R @@ -237,6 +237,7 @@ print.ancestral <- function(x, ...){ } #' @rdname ancestral.pml +#' @param x an object of class ancestral #' @export as.phyDat.ancestral <- function(x, ...) { rbind(x$data, x$state) diff --git a/R/plotAnc.R b/R/plotAnc.R index 6bc54fbd..eff59327 100644 --- a/R/plotAnc.R +++ b/R/plotAnc.R @@ -94,7 +94,9 @@ plotAnc <- function(x, i = 1, col = NULL, data <- x$data type <- attr(data, "type") tree <- x$tree - Y <- subset(df, Site==i) + subset <- df[,"Site"] == i + Y <- df[subset & !is.na(subset),] +# Y <- subset(df, Site==i) y <- as.matrix(Y[, -c(1:3)]) # y <- y[, -c(1:3)] colnames(y) <- gsub("p_", "", colnames(y)) @@ -153,6 +155,7 @@ plotAnc <- function(x, i = 1, col = NULL, } #' @rdname plot.ancestral +#' @importFrom ggplot2 scale_x_continuous #' @export plotSeqLogo <- function(x, node=getRoot(x$tree), start=1, end=10, scheme="Ape_NT", ...){ stopifnot(inherits(x, "ancestral")) @@ -161,9 +164,11 @@ plotSeqLogo <- function(x, node=getRoot(x$tree), start=1, end=10, scheme="Ape_NT df <- getAncDF(x) nodes <- c(tree$tip.label, tree$node.label) if(is.numeric(node)) node <- nodes[node] - X <- subset(df, subset=Node==node) + subset <- df[,"Node"] == node + X <- df[subset & !is.na(subset),] +# X2 <- subset(df, subset=Node==node) end <- min(end, nrow(X)) - X <- X[start:end, , drop=FALSE] +# X <- X[start:end, , drop=FALSE] X <- t(as.matrix(X[, -c(1:3)])) tmp <- gsub("p_", "", rownames(X)) lev <- rownames(X) <- toupper(tmp) @@ -184,10 +189,14 @@ plotSeqLogo <- function(x, node=getRoot(x$tree), start=1, end=10, scheme="Ape_NT } else SC <- make_col_scheme(chars=lev, cols= hcl.colors(length(lev))) - ggseqlogo(X, col_scheme=SC, method='p') + ggseqlogo(X, col_scheme=SC, method='p') + + scale_x_continuous(limits = c(start-0.5, end+.5) , + breaks=pretty(seq(start, end))) } +# p = ggplot() + geom_logo(data = data, ...) + theme_logo() + ##' @rdname plot.ancestral ##' @export #image.ancestral <- function(x, ...){ diff --git a/man/ancestral.pml.Rd b/man/ancestral.pml.Rd index e57d226f..353c45ea 100644 --- a/man/ancestral.pml.Rd +++ b/man/ancestral.pml.Rd @@ -23,6 +23,8 @@ pace(tree, data, type = c("MPR", "ACCTRAN", "POSTORDER"), cost = NULL, ...) \item{\dots}{Further arguments passed to or from other methods.} +\item{x}{an object of class ancestral} + \item{tree}{a tree, i.e. an object of class pml} \item{data}{an object of class phyDat} From 8a56b703e654c01d04ba47d505075bb4d3dc99e4 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Tue, 23 Apr 2024 11:20:59 +0200 Subject: [PATCH 161/216] add ggplot2 as import --- DESCRIPTION | 3 ++- NAMESPACE | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 55c04cf9..4593a713 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: phangorn Title: Phylogenetic Reconstruction and Analysis -Version: 3.0.0.1 +Version: 3.0.0.0 Authors@R: c(person("Klaus", "Schliep", role = c("aut", "cre"), email = "klaus.schliep@gmail.com", @@ -45,6 +45,7 @@ Imports: fastmatch, generics, ggseqlogo, + ggplot2, graphics, grDevices, igraph (>= 1.0), diff --git a/NAMESPACE b/NAMESPACE index fd99ef07..6dc4220f 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -25,6 +25,7 @@ S3method(as.phyDat,DNAStringSet) S3method(as.phyDat,DNAbin) S3method(as.phyDat,MultipleAlignment) S3method(as.phyDat,alignment) +S3method(as.phyDat,ancestral) S3method(as.phyDat,character) S3method(as.phyDat,data.frame) S3method(as.phyDat,factor) @@ -48,7 +49,6 @@ S3method(glance,pmlMix) S3method(hash,multiPhylo) S3method(hash,phylo) S3method(identify,networx) -S3method(image,ancestral) S3method(image,phyDat) S3method(logLik,pml) S3method(logLik,pmlMix) @@ -261,6 +261,7 @@ importFrom(Matrix,sparseMatrix) importFrom(fastmatch,fmatch) importFrom(generics,glance) importFrom(generics,tidy) +importFrom(ggplot2,scale_x_continuous) importFrom(ggseqlogo,ggseqlogo) importFrom(ggseqlogo,make_col_scheme) importFrom(grDevices,adjustcolor) From 646434746e5825c58dd897fa20669e3975d7bfb4 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Tue, 23 Apr 2024 11:27:09 +0200 Subject: [PATCH 162/216] updates --- NEWS | 61 +++++++++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 46 insertions(+), 15 deletions(-) diff --git a/NEWS b/NEWS index 36c693cc..9416a4ee 100644 --- a/NEWS +++ b/NEWS @@ -1,4 +1,45 @@ - CHANGES in PHANGORN VERSION 2.12.0 + CHANGES in PHANGORN VERSION 3.0.0 + +NEW FEATURES + + o new function supgma, performing serial sampled UPGMA + + (Drummond, Rodrigo 2000) + + o new function gap_as_state, treating gaps as a state. + + o new function keep_as_tip allows to prune a tree and keep nodes as a tip + + (suggested by Hedvig Skirgård) + + o nnls now can estimate trees with calibrations and tip dated trees + + o the functions Descendants, Ancestors, Siblings, mrca.phy now also accept a + + character vector for the node argument and not only integers. + + o Ancestral state reconstructionwas in parts rewritten. Output is now an + + object of class ancestral. E.g. ancestral states of constant sites are now + + always this state. + + o Improvements to several plot functions to get nicer plots out of the box. + + o Nicer defaults for plot.pml, mainly for rooted trees. + + o plotAnc got an argument scheme allowing to use different color schemes, + + especially for amino acid models shared with image.AAbin. + + o plot.networx gets two additional arguments, direction is in plot.phylo and + + angle to rotate the network. + + o new function composition_test comparing indicating possible problems + + with base / state composition. + OTHER CHANGES @@ -12,27 +53,17 @@ OTHER CHANGES which were implicitly using "estimated". - o pml_bb roots the tree, if method is either ultrametric or tipdated. Fixes - - problem when modelTest object was supplied. - o some improvements to pmlPart. - o nicer defaults for plot.pml, mainly for rooted trees. - - o plot.networx gets two additional arguments direction and angle. - o the sankoff algorithm has been rewritten. - o functions Descendants, Ancestors, Siblings, mrca.phy now also accept a - - character vector for the node argument. +BUG FIXES - o plotAnc got an argument scheme allowing to use different color schemes. + o pml_bb roots the tree, if method is either ultrametric or tipdated. Fixes - Some default values changed to produce nicer plots are nicer out of the + problem when modelTest object was supplied. - box. + o nnls.tree now checks if the tree has singletons. CHANGES in PHANGORN VERSION 2.11.0 From 0106dc50707ddfd517d08ac141028209ec935c5c Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Tue, 23 Apr 2024 12:20:06 +0200 Subject: [PATCH 163/216] bugfix --- R/image_phyDat.R | 3 ++- man/image.phyDat.Rd | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/R/image_phyDat.R b/R/image_phyDat.R index 179d61d5..577afa6d 100644 --- a/R/image_phyDat.R +++ b/R/image_phyDat.R @@ -3,6 +3,7 @@ #' This function plots an image of an alignment of sequences. #' #' A wrapper for using \code{\link{image.DNAbin}} and \code{\link{image.AAbin}}. +#' Codons triplets are handled as nucleotide sequences. #' @param x an object containing sequences, an object of class \code{phyDat}. #' @param ... further arguments passed to or from other methods. #' @seealso \code{\link{image.DNAbin}}, \code{\link{image.AAbin}} @@ -13,5 +14,5 @@ image.phyDat <- function(x, ...){ if(attr(x, "type") == "CODON") x <- codon2dna(x) if(attr(x, "type") == "AA") image(as.AAbin(x), ...) if(attr(x, "type") == "DNA") image(as.DNAbin(x), ...) - if(attr(x, "USER") ) return(NULL) + if(attr(x, "type") == "USER") return(NULL) } diff --git a/man/image.phyDat.Rd b/man/image.phyDat.Rd index 8917463c..1d9d5ed0 100644 --- a/man/image.phyDat.Rd +++ b/man/image.phyDat.Rd @@ -16,6 +16,7 @@ This function plots an image of an alignment of sequences. } \details{ A wrapper for using \code{\link{image.DNAbin}} and \code{\link{image.AAbin}}. +Codons triplets are handled as nucleotide sequences. } \seealso{ \code{\link{image.DNAbin}}, \code{\link{image.AAbin}} From f6932e3703a6d7473406274dcde24d5c5c014e6d Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Sun, 28 Apr 2024 14:50:12 +0200 Subject: [PATCH 164/216] add xlim and ylim, allow more control for the plots --- R/Densi.R | 84 ++++++++++++++++++++++++++++-------------------- man/densiTree.Rd | 7 +++- 2 files changed, 55 insertions(+), 36 deletions(-) diff --git a/R/Densi.R b/R/Densi.R index 3b84f318..8a7fa78f 100644 --- a/R/Densi.R +++ b/R/Densi.R @@ -12,12 +12,12 @@ getAges <- function(x) { add_tiplabels <- function(xy, tip.label, direction, adj, font, srt = 0, cex = 1, - col = 1, label.offset = 0) { + col = 1, label.offset = 0, maxBT=1) { direction <- match.arg(direction, c("rightwards", "leftwards", "upwards", "downwards")) horizontal <- direction %in% c("rightwards", "leftwards") nTips <- length(tip.label) - xx <- rep(1, nrow(xy)) + xx <- rep(maxBT, nrow(xy)) yy <- xy[, 2 ] if (direction == "leftwards" | direction == "downwards") xx <- xx * 0 if (!horizontal) { @@ -92,6 +92,8 @@ add_tiplabels <- function(xy, tip.label, direction, adj, font, srt = 0, cex = 1, #' @param jitter allows to shift trees. a list with two arguments: the amount of #' jitter and random or equally spaced (see details below) #' @param tip.dates A named vector of sampling times associated with the tips. +#' @param xlim the x limits of the plot. +#' @param ylim the y limits of the plot. #' @param \dots further arguments to be passed to plot. #' @author Klaus Schliep \email{klaus.schliep@@gmail.com} #' @seealso \code{\link{plot.phylo}}, \code{\link{plot.networx}}, @@ -125,12 +127,12 @@ add_tiplabels <- function(xy, tip.label, direction, adj, font, srt = 0, cex = 1, #' #' @export densiTree <- function(x, type = "cladogram", alpha = 1 / length(x), - consensus = NULL, direction = "rightwards", optim = FALSE, - scaleX = FALSE, col = 1, width = 1, lty = 1, cex = .8, - font = 3, tip.color = 1, adj = 0, srt = 0, - underscore = FALSE, label.offset = 0, scale.bar = TRUE, - jitter = list(amount = 0, random = TRUE), tip.dates=NULL, - ...) { + consensus = NULL, direction = "rightwards", optim = FALSE, + scaleX = FALSE, col = 1, width = 1, lty = 1, cex = .8, + font = 3, tip.color = 1, adj = 0, srt = 0, + underscore = FALSE, label.offset = 0, scale.bar = TRUE, + jitter = list(amount = 0, random = TRUE), tip.dates=NULL, + xlim=NULL, ylim=NULL, ...) { if (!inherits(x, "multiPhylo")) stop("x must be of class multiPhylo") if (is.character(consensus)) { consensus <- stree(length(consensus), tip.label = consensus) @@ -160,8 +162,6 @@ densiTree <- function(x, type = "cladogram", alpha = 1 / length(x), nTip <- as.integer(length(consensus$tip.label)) consensus <- sort_tips(consensus) consensus <- reorder(consensus, "postorder") - - at <- NULL maxBT <- max(getAges(x)) if(!is.null(tip.dates)){ @@ -169,57 +169,63 @@ densiTree <- function(x, type = "cladogram", alpha = 1 / length(x), label <- pretty(c(root_time, max(tip.dates)), min.n = 3) label <- label[label < max(tip.dates)] maxBT <- max(maxBT, max(tip.dates) - min(label)) - at <- 1 - (max(tip.dates) - label) / maxBT - if(direction=="leftwards" || direction=="downwards") at <- at + 1- max(at) + at <- maxBT - (max(tip.dates) - label) #/ maxBT + if(direction=="leftwards" || direction=="downwards") at <- at + maxBT - max(at) scaleX <- FALSE } else { if (scaleX) maxBT <- 1.0 label <- rev(pretty(c(maxBT, 0))) maxBT <- max(label, maxBT) - at <- seq(0, 1.0, length.out = length(label)) + at <- seq(0, maxBT, length.out = length(label)) } - xy <- plotPhyloCoor(consensus, direction = direction, ...) yy <- xy[, 2] - tl <- which.max(nchar(consensus$tip.label)) if(horizontal) pin1 <- par("pin")[1] else pin1 <- par("pin")[2] - sw <- strwidth(consensus$tip.label[tl], "inch", cex = cex) / pin1 * 1.1 - + sw <- strwidth(consensus$tip.label[tl], "inch", cex = cex) / pin1 * 1.1 * maxBT + if(is.null(xlim)){ + xlim <- switch(direction, + rightwards = c(0, maxBT + sw), + leftwards = c(0 - sw, maxBT), + downwards = c(0, nTip + 1), + upwards = c(0, nTip + 1)) + } + if(is.null(ylim)){ + ylim <- switch(direction, + rightwards = c(0, nTip + 1), + leftwards = c(0, nTip + 1), + downwards = c(0 - sw, maxBT), + upwards = c(0, maxBT + sw)) + } if (direction == "rightwards") { - plot.default(0, type = "n", xlim = c(0, 1.0 + sw), ylim = c(0, nTip + 1), + plot.default(0, type = "n", xlim = xlim, ylim = ylim, xlab = "", ylab = "", axes = FALSE, ...) - if (scale.bar) axis(side = 1, at = at, #seq(0, 1.0, length.out = length(label)), - labels = label) + if (scale.bar) axis(side = 1, at = at, labels = label, cex.axis=cex) } if (direction == "leftwards") { - plot.default(0, type = "n", xlim = c(0 - sw, 1.0), ylim = c(0, nTip + 1), + plot.default(0, type = "n", xlim = xlim, ylim = ylim, xlab = "", ylab = "", axes = FALSE, ...) - if (scale.bar) axis(side = 1, at = at, #seq(0, 1.0, length.out = length(label)), - labels = rev(label)) + if (scale.bar) axis(side = 1, at = at, labels = rev(label), cex.axis=cex) } if (direction == "downwards") { - plot.default(0, type = "n", xlim = c(0, nTip + 1), ylim = c(0 - sw, 1.0), + plot.default(0, type = "n", xlim = xlim, ylim = ylim, xlab = "", ylab = "", axes = FALSE, ...) - if (scale.bar) axis(side = 2, at = at, #seq(0, 1.0, length.out = length(label)), - labels = rev(label)) + if (scale.bar) axis(side = 2, at = at, labels = rev(label), cex.axis=cex) } if (direction == "upwards") { - plot.default(0, type = "n", xlim = c(0, nTip + 1), ylim = c(0, 1.0 + sw), + plot.default(0, type = "n", xlim = xlim, ylim = ylim, xlab = "", ylab = "", axes = FALSE, ...) - if (scale.bar) axis(side = 2, at = at, #seq(0, 1.0, length.out = length(label)), - labels = label) + if (scale.bar) axis(side = 2, at = at, labels = label, cex.axis=cex) } tip_labels <- consensus$tip.label if (is.expression(consensus$tip.label)) underscore <- TRUE if (!underscore) tip_labels <- gsub("_", " ", tip_labels) - add_tiplabels(xy, tip_labels, direction, adj = adj, font = font, srt = srt, - cex = cex, col = tip.color, label.offset = label.offset) + cex = cex, col = tip.color, label.offset = label.offset, maxBT = maxBT) col <- rep(col, length.out = length(x)) tiporder <- 1:nTip @@ -241,14 +247,14 @@ densiTree <- function(x, type = "cladogram", alpha = 1 / length(x), if (horizontal) { if (scaleX) xx <- xx / max(xx) - else xx <- xx / maxBT - if (direction == "rightwards") xx <- xx + (1.0 - max(xx)) + else xx <- xx #/ maxBT + if (direction == "rightwards") xx <- xx + (maxBT - max(xx)) if (jitter$amount > 0) yy <- yy + jit[treeindex] } else { if (scaleX) yy <- yy / max(yy) - else yy <- yy / maxBT - if (direction == "upwards") yy <- yy + (1.0 - max(yy)) + #else yy <- yy + if (direction == "upwards") yy <- yy + (maxBT - max(yy)) if (jitter$amount > 0) xx <- xx + jit[treeindex] } e1 <- tmp$edge[, 1] @@ -263,4 +269,12 @@ densiTree <- function(x, type = "cladogram", alpha = 1 / length(x), edge.lty = lty) } } + L <- list(type = type, font = font, cex = cex, + adj = adj, srt = srt, #no.margin = no.margin, + label.offset = label.offset, + x.lim = xlim, y.lim = ylim, direction = direction, + tip.color = tip.color, Ntip = nTip #, Nnode = Nnode, + #root.time = x$root.time, align.tip.label = align.tip.label + ) + assign("last_plot.phylo", L, envir = .PlotPhyloEnv) } diff --git a/man/densiTree.Rd b/man/densiTree.Rd index ddf281c3..7dd4a55a 100644 --- a/man/densiTree.Rd +++ b/man/densiTree.Rd @@ -8,7 +8,8 @@ densiTree(x, type = "cladogram", alpha = 1/length(x), consensus = NULL, direction = "rightwards", optim = FALSE, scaleX = FALSE, col = 1, width = 1, lty = 1, cex = 0.8, font = 3, tip.color = 1, adj = 0, srt = 0, underscore = FALSE, label.offset = 0, scale.bar = TRUE, - jitter = list(amount = 0, random = TRUE), tip.dates = NULL, ...) + jitter = list(amount = 0, random = TRUE), tip.dates = NULL, + xlim = NULL, ylim = NULL, ...) } \arguments{ \item{x}{an object of class \code{multiPhylo}.} @@ -63,6 +64,10 @@ jitter and random or equally spaced (see details below)} \item{tip.dates}{A named vector of sampling times associated with the tips.} +\item{xlim}{the x limits of the plot.} + +\item{ylim}{the y limits of the plot.} + \item{\dots}{further arguments to be passed to plot.} } \description{ From 212254de981d16514b4489ad54644fcf7e4882d0 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Sun, 28 Apr 2024 14:52:18 +0200 Subject: [PATCH 165/216] improve documentation, add examples and return values to make pkgcheck happy --- R/image_phyDat.R | 3 +++ R/parsimony.R | 2 +- R/plot_pml.R | 11 ++++++++--- R/transferBootstrap.R | 2 ++ R/treedist.R | 3 +++ man/CI.Rd | 2 +- man/cophenetic.networx.Rd | 4 ++++ man/image.phyDat.Rd | 4 ++++ man/plot.pml.Rd | 4 ++++ man/transferBootstrap.Rd | 4 ++++ 10 files changed, 34 insertions(+), 5 deletions(-) diff --git a/R/image_phyDat.R b/R/image_phyDat.R index 577afa6d..d636fe81 100644 --- a/R/image_phyDat.R +++ b/R/image_phyDat.R @@ -7,6 +7,9 @@ #' @param x an object containing sequences, an object of class \code{phyDat}. #' @param ... further arguments passed to or from other methods. #' @seealso \code{\link{image.DNAbin}}, \code{\link{image.AAbin}} +#' @examples +#' data("chloroplast") +#' image(chloroplast[, 1:50], scheme="Clustal", show.aa = TRUE) #' @method image phyDat #' @export image.phyDat <- function(x, ...){ diff --git a/R/parsimony.R b/R/parsimony.R index a8b6c109..af1ab421 100644 --- a/R/parsimony.R +++ b/R/parsimony.R @@ -193,7 +193,7 @@ upperBound <- function(x, cost = NULL) { #' \deqn{RI = \frac{MaxChanges - ObsChanges}{MaxChanges - MinChanges}}{RI = (MaxChanges - ObsChanges) / (MaxChanges - MinChanges)} #' #' @param data A object of class phyDat containing sequences. -#' @param tree tree to start the nni search from. +#' @param tree a phylogenetic tree, i.e. an object of class \code{phylo}. #' @param cost A cost matrix for the transitions between two states. #' @param sitewise return CI/RI for alignment or sitewise #' diff --git a/R/plot_pml.R b/R/plot_pml.R index c1200d6e..0c29fcf0 100644 --- a/R/plot_pml.R +++ b/R/plot_pml.R @@ -11,6 +11,8 @@ #' Four values are possible: "rightwards" (the default), "leftwards", "upwards", #' and "downwards". #' @param \dots further parameters to be passed to \code{plot.phylo}. +#' @return \code{plot.pml} returns invisibly a list with arguments dexcribing the plot. +#' For further details see the \code{plot.phylo}. #' @author Klaus Schliep \email{klaus.schliep@@gmail.com} #' @seealso \code{\link{plot.phylo}}, \code{\link{axisPhylo}}, #' \code{\link{add.scale.bar}} @@ -41,8 +43,9 @@ plot.pml <- function(x, type="phylogram", direction = "rightwards", ...){ tree <- x$tree extras <- match.call(expand.dots = FALSE)$... cex <- ifelse(is.null(extras$cex), par("cex"), extras$cex) + cex.axis <- ifelse(is.null(extras$cex.axis), cex, extras$cex.axis) if(!is.rooted(tree) && (type != "unrooted") ) tree <- midpoint(tree) - plot.phylo(tree, type=type, direction=direction, ...) + L <- plot.phylo(tree, type=type, direction=direction, ...) if(is.rooted(tree) && (type %in% c("phylogram","cladogram"))){ direction <- match.arg(direction, c("rightwards", "leftwards", "upwards", "downwards")) @@ -53,11 +56,13 @@ plot.pml <- function(x, type="phylogram", direction = "rightwards", ...){ "downwards" = 2) if(!is.null(x$tip.dates) && x$method=="tipdated"){ root_time <- max(x$tip.dates) - max(node.depth.edgelength(x$tree)) - axisPhylo(side, root.time = root_time, backward = FALSE) + axisPhylo(side, root.time = root_time, backward = FALSE, + cex.axis=cex.axis) } - else if(x$method=="ultrametric") axisPhylo(side) + else if(x$method=="ultrametric") axisPhylo(side, cex.axis=cex.axis) else add.scale.bar(cex=cex) } else add.scale.bar(cex=cex) if(!is.null(x$bs)) add_support(tree, x$bs, cex=cex) + invisible(L) } diff --git a/R/transferBootstrap.R b/R/transferBootstrap.R index b6d0620f..e01533bb 100644 --- a/R/transferBootstrap.R +++ b/R/transferBootstrap.R @@ -9,6 +9,8 @@ #' @param phylo Logical, return a phylogentic tree with support value or a #' vector of bootstrap values. #' @param scale scale the values. +#' @return a phylogentic tree (a phylo object) with bootstrap values assigned to +#' the node labels. #' @author Klaus Schliep \email{klaus.schliep@@gmail.com} #' @seealso \code{\link{plotBS}}, \code{\link{maxCladeCred}}, #' \code{\link{drawSupportOnEdges}} diff --git a/R/treedist.R b/R/treedist.R index 5b4d2e60..0adc5e58 100644 --- a/R/treedist.R +++ b/R/treedist.R @@ -49,6 +49,9 @@ cophenetic.splits <- function(x) { #' @seealso \code{\link[stats]{cophenetic}} for the generic function, #' \code{neighborNet} to construct a network from a distance matrix #' @keywords manip +#' @examples +#' example(neighborNet) +#' cophenetic(nnet) #' @export cophenetic.networx <- function(x) { spl <- x$splits diff --git a/man/CI.Rd b/man/CI.Rd index e41e9689..13fb0b3d 100644 --- a/man/CI.Rd +++ b/man/CI.Rd @@ -10,7 +10,7 @@ CI(tree, data, cost = NULL, sitewise = FALSE) RI(tree, data, cost = NULL, sitewise = FALSE) } \arguments{ -\item{tree}{tree to start the nni search from.} +\item{tree}{a phylogenetic tree, i.e. an object of class \code{phylo}.} \item{data}{A object of class phyDat containing sequences.} diff --git a/man/cophenetic.networx.Rd b/man/cophenetic.networx.Rd index 1adc540c..91da6119 100644 --- a/man/cophenetic.networx.Rd +++ b/man/cophenetic.networx.Rd @@ -18,6 +18,10 @@ labels (as given by the element \code{tip.label} of the argument \code{x}). \code{cophenetic.networx} computes the pairwise distances between the pairs of tips from a phylogenetic network using its branch lengths. } +\examples{ +example(neighborNet) +cophenetic(nnet) +} \seealso{ \code{\link[stats]{cophenetic}} for the generic function, \code{neighborNet} to construct a network from a distance matrix diff --git a/man/image.phyDat.Rd b/man/image.phyDat.Rd index 1d9d5ed0..73adb0bf 100644 --- a/man/image.phyDat.Rd +++ b/man/image.phyDat.Rd @@ -18,6 +18,10 @@ This function plots an image of an alignment of sequences. A wrapper for using \code{\link{image.DNAbin}} and \code{\link{image.AAbin}}. Codons triplets are handled as nucleotide sequences. } +\examples{ +data("chloroplast") +image(chloroplast[, 1:50], scheme="Clustal", show.aa = TRUE) +} \seealso{ \code{\link{image.DNAbin}}, \code{\link{image.AAbin}} } diff --git a/man/plot.pml.Rd b/man/plot.pml.Rd index 77971584..35a212e5 100644 --- a/man/plot.pml.Rd +++ b/man/plot.pml.Rd @@ -19,6 +19,10 @@ and "downwards".} \item{\dots}{further parameters to be passed to \code{plot.phylo}.} } +\value{ +\code{plot.pml} returns invisibly a list with arguments dexcribing the plot. +For further details see the \code{plot.phylo}. +} \description{ \code{plot.pml} is a wrapper around \code{plot.phylo} with different default values for unrooted, ultrametric and tip dated phylogenies. diff --git a/man/transferBootstrap.Rd b/man/transferBootstrap.Rd index 32eefe94..fb714273 100644 --- a/man/transferBootstrap.Rd +++ b/man/transferBootstrap.Rd @@ -16,6 +16,10 @@ vector of bootstrap values.} \item{scale}{scale the values.} } +\value{ +a phylogentic tree (a phylo object) with bootstrap values assigned to +the node labels. +} \description{ \code{transferBootstrap} assigns transfer bootstrap (Lemoine et al. 2018) values to the (internal) edges. From 4b3186cc1ae4368e2976cf6e79a64a03a4e017f1 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Sun, 28 Apr 2024 15:48:36 +0200 Subject: [PATCH 166/216] update, add return --- R/read.nexus.partitions.R | 8 +++++--- man/read.nexus.partitions.Rd | 9 +++++++-- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/R/read.nexus.partitions.R b/R/read.nexus.partitions.R index 6539ff53..b23ada8d 100644 --- a/R/read.nexus.partitions.R +++ b/R/read.nexus.partitions.R @@ -39,12 +39,14 @@ read.nexus.charset <- function(file){ #' Function to import partitioned data from nexus files #' #' \code{read.nexus.partitions} reads in sequences in NEXUS format and splits -#' the data according to the charsets givb in the SETS block. +#' the data according to the charsets given in the SETS block. #' #' @param file a file name. -#' @param return either returns a list where each element is a 'phyDat' object. -## or an object of class 'multiphyDat' +#' @param return either returns a list where each element is a 'phyDat' object +#' or an object of class 'multiphyDat' #' @param \dots Further arguments passed to or from other methods. +#' @return a list where each element is a 'phyDat' object or an object of class +#' 'multiphyDat'. #' @author Klaus Schliep \email{klaus.schliep@@gmail.com} #' @seealso \code{\link{read.nexus.data}}, \code{\link{read.phyDat}} #' @keywords cluster diff --git a/man/read.nexus.partitions.Rd b/man/read.nexus.partitions.Rd index 3891e38a..468746c5 100644 --- a/man/read.nexus.partitions.Rd +++ b/man/read.nexus.partitions.Rd @@ -9,13 +9,18 @@ read.nexus.partitions(file, return = "list", ...) \arguments{ \item{file}{a file name.} -\item{return}{either returns a list where each element is a 'phyDat' object.} +\item{return}{either returns a list where each element is a 'phyDat' object +or an object of class 'multiphyDat'} \item{\dots}{Further arguments passed to or from other methods.} } +\value{ +a list where each element is a 'phyDat' object or an object of class +'multiphyDat'. +} \description{ \code{read.nexus.partitions} reads in sequences in NEXUS format and splits -the data according to the charsets givb in the SETS block. +the data according to the charsets given in the SETS block. } \examples{ tree <- rtree(10) From bd60ac023e3beb6f907d03869e5edf03a25da4df Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Sun, 28 Apr 2024 15:56:37 +0200 Subject: [PATCH 167/216] don't export read.aa (use read.phyDat instead) --- R/phyDat.R | 2 +- man/read.aa.Rd | 53 -------------------------------------------------- 2 files changed, 1 insertion(+), 54 deletions(-) delete mode 100644 man/read.aa.Rd diff --git a/R/phyDat.R b/R/phyDat.R index da154fe5..a85a316b 100644 --- a/R/phyDat.R +++ b/R/phyDat.R @@ -512,7 +512,7 @@ constSitePattern <- function(n, names=NULL, type="DNA", levels=NULL){ #' Department of Genetics, University of Washington. #' \url{https://phylipweb.github.io/phylip/} #' @keywords IO -#' @export read.aa +#' @noRd read.aa <- function (file, format = "interleaved", skip = 0, nlines = 0, comment.char = "#", seq.names = NULL){ getTaxaNames <- function(x) { diff --git a/man/read.aa.Rd b/man/read.aa.Rd deleted file mode 100644 index 2e391bde..00000000 --- a/man/read.aa.Rd +++ /dev/null @@ -1,53 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/phyDat.R -\name{read.aa} -\alias{read.aa} -\title{Read Amino Acid Sequences in a File} -\usage{ -read.aa(file, format = "interleaved", skip = 0, nlines = 0, - comment.char = "#", seq.names = NULL) -} -\arguments{ -\item{file}{a file name specified by either a variable of mode character, or -a double-quoted string.} - -\item{format}{a character string specifying the format of the DNA sequences. -Three choices are possible: \code{"interleaved"}, \code{"sequential"}, or -\code{"fasta"}, or any unambiguous abbreviation of these.} - -\item{skip}{the number of lines of the input file to skip before beginning -to read data.} - -\item{nlines}{the number of lines to be read (by default the file is read -until its end).} - -\item{comment.char}{a single character, the remaining of the line after this -character is ignored.} - -\item{seq.names}{the names to give to each sequence; by default the names -read in the file are used.} -} -\value{ -a matrix of amino acid sequences. -} -\description{ -This function reads amino acid sequences in a file, and returns a matrix -list of DNA sequences with the names of the taxa read in the file as row -names. -} -\references{ -% Anonymous. FASTA format description. % -\url{https://en.wikipedia.org/wiki/FASTA_format} - -Felsenstein, J. (1993) Phylip (Phylogeny Inference Package) version 3.5c. -Department of Genetics, University of Washington. -\url{https://phylipweb.github.io/phylip/} -} -\seealso{ -\code{\link[ape]{read.dna}}, \code{\link[ape]{read.GenBank}}, -\code{\link[phangorn]{phyDat}}, \code{\link[seqinr]{read.alignment}} -} -\author{ -Klaus Schliep \email{klaus.schliep@gmail.com} -} -\keyword{IO} From 90bc96e0ad2fb8d199c0b9a5bcc44c4463c772d8 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Sun, 28 Apr 2024 18:45:26 +0200 Subject: [PATCH 168/216] improve man page --- R/parsimony.R | 21 ++++++++++++++++++++- R/phyDat.R | 2 +- R/phyDat_conversion.R | 2 +- R/read.phyDat.R | 2 +- man/CI.Rd | 24 ++++++++++++++++++++++++ man/read.phyDat.Rd | 2 +- 6 files changed, 48 insertions(+), 5 deletions(-) diff --git a/R/parsimony.R b/R/parsimony.R index af1ab421..4df93611 100644 --- a/R/parsimony.R +++ b/R/parsimony.R @@ -196,10 +196,29 @@ upperBound <- function(x, cost = NULL) { #' @param tree a phylogenetic tree, i.e. an object of class \code{phylo}. #' @param cost A cost matrix for the transitions between two states. #' @param sitewise return CI/RI for alignment or sitewise -#' +#' @returns a scalar or vector with the CI/RI vector. #' @seealso \code{\link{parsimony}}, \code{\link{pratchet}}, #' \code{\link{fitch}}, \code{\link{sankoff}}, \code{\link{bab}}, #' \code{\link{ancestral.pars}} +#' @examples +#' example(as.phylo.formula) +#' lab <- tr$tip.label +#' lab[79] <- "Herpestes fuscus" +#' tr$tip.label <- abbreviateGenus(lab) +#' X <- matrix(0, 112, 3, dimnames = list(tr$tip.label, c("Canis", "Panthera", +#' "Canis_Panthera"))) +#' desc_canis <- Descendants(tr, "Canis")[[1]] +#' desc_panthera <- Descendants(tr, "Panthera")[[1]] +#' X[desc_canis, c(1,3)] <- 1 +#' X[desc_panthera, c(2,3)] <- 1 +#' col <- rep("black", 112) +#' col[desc_panthera] <- "red" +#' col[desc_canis] <- "blue" +#' X <- phyDat(X, "USER", levels=c(0,1)) +#' plot(tr, "f", tip.color=col) +#' # The first two sites are homoplase free! +#' CI(tr, X, sitewise=TRUE) +#' RI(tr, X, sitewise=TRUE) #' #' @rdname CI #' @export diff --git a/R/phyDat.R b/R/phyDat.R index a85a316b..169b0d94 100644 --- a/R/phyDat.R +++ b/R/phyDat.R @@ -39,7 +39,7 @@ #' @author Klaus Schliep \email{klaus.schliep@@gmail.com} #' @seealso \code{\link{DNAbin}}, \code{\link{as.DNAbin}}, #' \code{\link{baseFreq}}, \code{\link{glance.phyDat}}, \code{\link{dna2codon}}, -#' \code{\link{read.dna}}, \code{\link{read.aa}}, \code{\link{read.nexus.data}} +#' \code{\link{read.dna}}, \code{\link{read.nexus.data}} #' and the chapter 1 in the \code{vignette("AdvancedFeatures", #' package="phangorn")} and the example of \code{\link{pmlMix}} for the use of #' \code{\link{allSitePattern}}. diff --git a/R/phyDat_conversion.R b/R/phyDat_conversion.R index a98c046f..de664302 100644 --- a/R/phyDat_conversion.R +++ b/R/phyDat_conversion.R @@ -30,7 +30,7 @@ #' @author Klaus Schliep \email{klaus.schliep@@gmail.com} #' @seealso [DNAbin()], [as.DNAbin()], #' \code{\link{baseFreq}}, \code{\link{glance.phyDat}}, -#' \code{\link{read.dna}}, \code{\link{read.aa}}, \code{\link{read.nexus.data}} +#' \code{\link{read.dna}}, \code{\link{read.nexus.data}} #' and the chapter 1 in the \code{vignette("phangorn-specials", #' package="phangorn")} and the example of \code{\link{pmlMix}} for the use of #' \code{allSitePattern} diff --git a/R/read.phyDat.R b/R/read.phyDat.R index bd11a28a..9649652f 100644 --- a/R/read.phyDat.R +++ b/R/read.phyDat.R @@ -35,7 +35,7 @@ read.fasta.user <- function (file, skip = 0, nlines = 0, #' #' \code{write.phyDat} calls the function \code{\link[ape]{write.dna}} or #' \code{\link[ape]{write.nexus.data}} and \code{read.phyDat} calls the function -#' \code{\link[ape]{read.dna}}, \code{read.aa} or \code{read.nexus.data}, so see +#' \code{\link[ape]{read.dna}} or \code{read.nexus.data}, so see #' for more details over there. #' #' You may import data directly with \code{\link[ape]{read.dna}} or diff --git a/man/CI.Rd b/man/CI.Rd index 13fb0b3d..15036ef6 100644 --- a/man/CI.Rd +++ b/man/CI.Rd @@ -18,6 +18,9 @@ RI(tree, data, cost = NULL, sitewise = FALSE) \item{sitewise}{return CI/RI for alignment or sitewise} } +\value{ +a scalar or vector with the CI/RI vector. +} \description{ \code{CI} and \code{RI} compute the Consistency Index (CI) and Retention Index (RI). @@ -28,6 +31,27 @@ divided by the number of changes required on the tree (parsimony score). The Consistency Index is equal to one if there is no homoplasy. And the Retention Index is defined as \deqn{RI = \frac{MaxChanges - ObsChanges}{MaxChanges - MinChanges}}{RI = (MaxChanges - ObsChanges) / (MaxChanges - MinChanges)} +} +\examples{ +example(as.phylo.formula) +lab <- tr$tip.label +lab[79] <- "Herpestes fuscus" +tr$tip.label <- abbreviateGenus(lab) +X <- matrix(0, 112, 3, dimnames = list(tr$tip.label, c("Canis", "Panthera", + "Canis_Panthera"))) +desc_canis <- Descendants(tr, "Canis")[[1]] +desc_panthera <- Descendants(tr, "Panthera")[[1]] +X[desc_canis, c(1,3)] <- 1 +X[desc_panthera, c(2,3)] <- 1 +col <- rep("black", 112) +col[desc_panthera] <- "red" +col[desc_canis] <- "blue" +X <- phyDat(X, "USER", levels=c(0,1)) +plot(tr, "f", tip.color=col) +# The first two sites are homoplase free! +CI(tr, X, sitewise=TRUE) +RI(tr, X, sitewise=TRUE) + } \seealso{ \code{\link{parsimony}}, \code{\link{pratchet}}, diff --git a/man/read.phyDat.Rd b/man/read.phyDat.Rd index 5d838b1c..aa4710d3 100644 --- a/man/read.phyDat.Rd +++ b/man/read.phyDat.Rd @@ -40,7 +40,7 @@ These functions read and write sequence alignments. \details{ \code{write.phyDat} calls the function \code{\link[ape]{write.dna}} or \code{\link[ape]{write.nexus.data}} and \code{read.phyDat} calls the function -\code{\link[ape]{read.dna}}, \code{read.aa} or \code{read.nexus.data}, so see +\code{\link[ape]{read.dna}} or \code{read.nexus.data}, so see for more details over there. You may import data directly with \code{\link[ape]{read.dna}} or From 4c7a8148753282be30c7d7b9222999133e9d7f6a Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Sun, 28 Apr 2024 18:49:30 +0200 Subject: [PATCH 169/216] small improvements --- R/plotAnc.R | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/R/plotAnc.R b/R/plotAnc.R index eff59327..28288e2f 100644 --- a/R/plotAnc.R +++ b/R/plotAnc.R @@ -111,15 +111,15 @@ plotAnc <- function(x, i = 1, col = NULL, xrad <- CEX * diff(par("usr")[1:2]) / 50 levels <- attr(data, "levels") nc <- attr(data, "nc") - if(is.null(scheme) & type=="AA") scheme="Ape_AA" - if(is.null(scheme) & type=="DNA") scheme="Ape_NT" + if(is.null(scheme) & type=="AA") scheme <- "Ape_AA" + if(is.null(scheme) & type=="DNA") scheme <- "Ape_NT" if(!is.null(scheme)){ scheme <- match.arg(scheme, c("Ape_AA", "Zappo_AA", "Clustal", "Polarity", "Transmembrane_tendency", "Ape_NT", "RY_NT")) sc <- get(scheme, environment(ace)) if(has_gap_state(data) && attr(data, "type")=="AA"){ sc$properties <- c(sc$properties, Gap="-") - sc$color <- c(sc$color, "#000000") + sc$color <- c(sc$color, "#FFFFFF") } if(attr(data, "type")=="DNA"){ ind <- match("n", names(sc$properties)) @@ -168,19 +168,19 @@ plotSeqLogo <- function(x, node=getRoot(x$tree), start=1, end=10, scheme="Ape_NT X <- df[subset & !is.na(subset),] # X2 <- subset(df, subset=Node==node) end <- min(end, nrow(X)) -# X <- X[start:end, , drop=FALSE] + X <- X[seq_len(end), , drop=FALSE] # creating the whole plot is slow X <- t(as.matrix(X[, -c(1:3)])) tmp <- gsub("p_", "", rownames(X)) lev <- rownames(X) <- toupper(tmp) - if(is.null(scheme) & type=="AA") scheme="Ape_AA" - if(is.null(scheme) & type=="DNA") scheme="Ape_NT" + if(is.null(scheme) & type=="AA") scheme <- "Ape_AA" + if(is.null(scheme) & type=="DNA") scheme <- "Ape_NT" if(!is.null(scheme)){ scheme <- match.arg(scheme, c("Ape_AA", "Zappo_AA", "Clustal", "Polarity", "Transmembrane_tendency", "Ape_NT", "RY_NT")) sc <- get(scheme, environment(ace)) if(has_gap_state(x$data) && type=="AA"){ sc$properties <- c(sc$properties, Gap="-") - sc$color <- c(sc$color, "#000000") + sc$color <- c(sc$color, "#FFFFFF") } l <- lengths(sc$properties) SC <- make_col_scheme(chars = toupper(unlist(sc$properties)), From 5a77a8c7254af7e8351a6126c223ba5a4f04fbb4 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Mon, 29 Apr 2024 08:57:26 +0200 Subject: [PATCH 170/216] small improvements --- NAMESPACE | 2 +- R/Coalescent.R | 3 +-- R/gap_as_state.R | 6 ++++-- man/as.phyDat.Rd | 2 +- man/gap_as_state.Rd | 3 +++ man/phyDat.Rd | 2 +- 6 files changed, 11 insertions(+), 7 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 6dc4220f..23630e8e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -168,6 +168,7 @@ export(glance) export(h2st) export(h4st) export(hadamard) +export(has_gap_state) export(hash) export(keep_as_tip) export(ldfactorial) @@ -219,7 +220,6 @@ export(rNNI) export(rSPR) export(random.addition) export(ratchet.control) -export(read.aa) export(read.nexus.dist) export(read.nexus.networx) export(read.nexus.partitions) diff --git a/R/Coalescent.R b/R/Coalescent.R index 2cff2f11..af63c2a1 100644 --- a/R/Coalescent.R +++ b/R/Coalescent.R @@ -98,8 +98,7 @@ coalSpeciesTree <- function(tree, X = NULL, sTree = NULL) { Labels = attr(X, "levels"), Size = l, class = "dist", Diag = FALSE, Upper = FALSE ) - - sTree <- upgma(dm, "single") + sTree <- as.phylo(hclust(dm, method = "single")) # dm of pairwise states } else { diff --git a/R/gap_as_state.R b/R/gap_as_state.R index f0ea845f..9acfae0a 100644 --- a/R/gap_as_state.R +++ b/R/gap_as_state.R @@ -36,7 +36,7 @@ gap_as_state <- function(obj, gap="-", ambiguous="?"){ attr(obj, "levels") <- levels attr(obj, "nc") <- attr(obj, "nc") + 1L attr(obj, "contrast") <- contrast - attr(obj, "gap_is_state") <- TRUE +# attr(obj, "gap_is_state") <- TRUE obj } @@ -58,11 +58,13 @@ gap_as_ambiguous <- function(obj, gap="-"){ attr(obj, "levels") <- levels[-ind] attr(obj, "contrast") <- contrast attr(obj, "nc") <- attr(obj, "nc") - 1L - attr(obj, "gap_is_state") <- FALSE +# attr(obj, "gap_is_state") <- FALSE obj } +#' @rdname gap_as_state +#' @export has_gap_state <- function(obj){ type <- attr(obj, "type") if(type=="DNA" && attr(obj, "nc")==5) return(TRUE) diff --git a/man/as.phyDat.Rd b/man/as.phyDat.Rd index 9e22c7d9..afba3199 100644 --- a/man/as.phyDat.Rd +++ b/man/as.phyDat.Rd @@ -113,7 +113,7 @@ all.equal(Laurasiatherian, as.phyDat(LauraDNAbin)) \seealso{ [DNAbin()], [as.DNAbin()], \code{\link{baseFreq}}, \code{\link{glance.phyDat}}, -\code{\link{read.dna}}, \code{\link{read.aa}}, \code{\link{read.nexus.data}} +\code{\link{read.dna}}, \code{\link{read.nexus.data}} and the chapter 1 in the \code{vignette("phangorn-specials", package="phangorn")} and the example of \code{\link{pmlMix}} for the use of \code{allSitePattern} diff --git a/man/gap_as_state.Rd b/man/gap_as_state.Rd index e5ea34b1..554fb274 100644 --- a/man/gap_as_state.Rd +++ b/man/gap_as_state.Rd @@ -3,11 +3,14 @@ \name{gap_as_state} \alias{gap_as_state} \alias{gap_as_ambiguous} +\alias{has_gap_state} \title{Treat gaps as a state} \usage{ gap_as_state(obj, gap = "-", ambiguous = "?") gap_as_ambiguous(obj, gap = "-") + +has_gap_state(obj) } \arguments{ \item{obj}{An object of class phyDat.} diff --git a/man/phyDat.Rd b/man/phyDat.Rd index 6aa47de2..922e7bd7 100644 --- a/man/phyDat.Rd +++ b/man/phyDat.Rd @@ -124,7 +124,7 @@ allSitePattern(5) \seealso{ \code{\link{DNAbin}}, \code{\link{as.DNAbin}}, \code{\link{baseFreq}}, \code{\link{glance.phyDat}}, \code{\link{dna2codon}}, -\code{\link{read.dna}}, \code{\link{read.aa}}, \code{\link{read.nexus.data}} +\code{\link{read.dna}}, \code{\link{read.nexus.data}} and the chapter 1 in the \code{vignette("AdvancedFeatures", package="phangorn")} and the example of \code{\link{pmlMix}} for the use of \code{\link{allSitePattern}}. From 8e0cbcfadd722323ca37caece700a2f848bb6928 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Mon, 29 Apr 2024 09:03:29 +0200 Subject: [PATCH 171/216] add tests --- inst/tinytest/test_phyDat.R | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/inst/tinytest/test_phyDat.R b/inst/tinytest/test_phyDat.R index 290e93ca..8102777d 100644 --- a/inst/tinytest/test_phyDat.R +++ b/inst/tinytest/test_phyDat.R @@ -131,3 +131,18 @@ map2 <- map_duplicates(Laurasiatherian) expect_null(map2) expect_true(inherits(map1, "data.frame")) + +# check gap as state + +Laurasiatherian_gap <- gap_as_state(Laurasiatherian) +chloroplast_gap <- gap_as_state(chloroplast) + +expect_false(has_gap_state(Laurasiatherian)) +expect_true(has_gap_state(Laurasiatherian_gap)) + +expect_false(has_gap_state(chloroplast)) +expect_true(has_gap_state(chloroplast_gap)) + +expect_equal(Laurasiatherian, gap_as_ambiguous(Laurasiatherian_gap)) +expect_equal(chloroplast, gap_as_ambiguous(chloroplast)) + From 91439ab236b7b367ab51e7ddceba2c31f3e26562 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Mon, 29 Apr 2024 09:04:31 +0200 Subject: [PATCH 172/216] update --- .Rbuildignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.Rbuildignore b/.Rbuildignore index ac50ef3d..418bb97e 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -16,3 +16,4 @@ ^vignettes/MLbyHand_cache$ ^vignettes/AdvancedFeatures_cache$ ^vignettes/logo.png +^codemeta\.json$ From 9191d98b8d9ac4ad7a35c82751f55ab6a70ce8e4 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Mon, 29 Apr 2024 15:50:38 +0200 Subject: [PATCH 173/216] add examples --- R/Coalescent.R | 7 ++++++- R/ancestral.R | 14 ++++++++++++++ R/phyDat.R | 2 +- R/phylo.R | 10 +++++++++- man/coalSpeciesTree.Rd | 7 +++++++ man/pml.fit.Rd | 10 ++++++++++ man/write.ancestral.Rd | 13 +++++++++++++ 7 files changed, 60 insertions(+), 3 deletions(-) diff --git a/R/Coalescent.R b/R/Coalescent.R index af63c2a1..21925ad1 100644 --- a/R/Coalescent.R +++ b/R/Coalescent.R @@ -68,8 +68,13 @@ comp2 <- function(x, y) { #' @references Liu, L., Yu, L. and Pearl, D. K. (2010) Maximum tree: a #' consistent estimator of the species tree. \emph{Journal of Mathematical #' Biology}, \bold{60}, 95--106. +#' @examples +#' ## example in Liu et al. (2010) +#' tr1 <- read.tree(text = "(((B:0.05,C:0.05):0.01,D:0.06):0.04,A:0.1);") +#' tr2 <- read.tree(text = "(((A:0.07,C:0.07):0.02,D:0.09):0.03,B:0.12);") +#' TR <- c(tr1, tr2) +#' sp_tree <- coalSpeciesTree(TR) #' @keywords cluster -#' #' @export coalSpeciesTree <- function(tree, X = NULL, sTree = NULL) { if (is.null(X)) return(speciesTree(tree)) diff --git a/R/ancestral.R b/R/ancestral.R index 578f493b..6fd2b2c5 100644 --- a/R/ancestral.R +++ b/R/ancestral.R @@ -168,6 +168,7 @@ ancestral.pml <- function(object, type = "marginal", ...) { attributes(result2) <- attrib attributes(result) <- attrib result <- list2df_ancestral(result, result2) + result2 <- compress.phyDat(result2) erg <- list(tree=tree, data=data, prob=result, state=result2) class(erg) <- "ancestral" erg @@ -187,6 +188,18 @@ ancestral.pml <- function(object, type = "marginal", ...) { #' @param file a file name. File endings are added. #' @param ... Further arguments passed to or from other methods. #' @seealso \code{\link{ancestral.pml}}, \code{\link{plot.ancestral}} +#' @examples +#' data(Laurasiatherian) +#' fit <- pml_bb(Laurasiatherian[,1:100], "JC", rearrangement = "none") +#' anc_ml <- ancestral.pml(fit) +#' write.ancestral(anc_ml) +#' # Can be also results from iqtree +#' align <- read.phyDat("ancestral_align.fasta") +#' tree <- read.tree("ancestral_tree.nwk") +#' df <- read.table("ancestral.state", header=TRUE) +#' anc_ml_disc <- ancestral(tree, align, df) +#' plotAnc(anc_ml_disc, 20) +#' unlink(c("ancestral_align.fasta", "ancestral_tree.nwk", "ancestral.state")) #' @rdname write.ancestral #' @export write.ancestral <- function(x, file="ancestral"){ @@ -335,6 +348,7 @@ ancestral.pars <- function(tree, data, type = c("MPR", "ACCTRAN", "POSTORDER"), # attributes(result2) <- attrib # attributes(result) <- attrib result <- list2df_ancestral(result, result2) + result2 <- compress.phyDat(result2) # needed? erg <- list(tree=tree, data=data, prob=result, state=result2) class(erg) <- "ancestral" erg diff --git a/R/phyDat.R b/R/phyDat.R index 169b0d94..64431b65 100644 --- a/R/phyDat.R +++ b/R/phyDat.R @@ -197,7 +197,7 @@ compress.phyDat <- function(data){ pos <- which(!duplicated(index)) weight <- tapply(attrib$weight, index, sum) names(weight) <- NULL - attrib$weight <- weight + attrib$weight <- as.vector(weight) if(is.null(attrib$index)) attrib$index <-index else attrib$index <- index[attrib$index] for(i in seq_len(length(data))) data[[i]] <- data[[i]][pos] diff --git a/R/phylo.R b/R/phylo.R index ce7dd58a..9f3fa581 100644 --- a/R/phylo.R +++ b/R/phylo.R @@ -1056,8 +1056,16 @@ pml.fit4 <- function(tree, data, bf = rep(1 / length(levels), length(levels)), #' @references Felsenstein, J. (1981) Evolutionary trees from DNA sequences: a #' maximum likelihood approach. \emph{Journal of Molecular Evolution}, #' \bold{17}, 368--376. +#' @examples +#' data(Laurasiatherian) +#' tree <- NJ(dist.ml(Laurasiatherian)) +#' bf <- rep(0.25, 4) +#' eig <- edQt() +#' pml.init(Laurasiatherian) +#' pml.fit(tree, Laurasiatherian, bf=bf, eig=eig) +#' pml.free() +#' pml(tree, Laurasiatherian) |> logLik() #' @keywords cluster -#' #' @rdname pml.fit #' @export pml.fit pml.fit <- function(tree, data, bf = rep(1 / length(levels), length(levels)), diff --git a/man/coalSpeciesTree.Rd b/man/coalSpeciesTree.Rd index 2e7e05c4..878be9ed 100644 --- a/man/coalSpeciesTree.Rd +++ b/man/coalSpeciesTree.Rd @@ -27,6 +27,13 @@ et al. (2010) from the element wise minima of the cophenetic matrices of the gene trees. It extends \code{speciesTree} in ape as it allows that have several individuals per gene tree. } +\examples{ +## example in Liu et al. (2010) +tr1 <- read.tree(text = "(((B:0.05,C:0.05):0.01,D:0.06):0.04,A:0.1);") +tr2 <- read.tree(text = "(((A:0.07,C:0.07):0.02,D:0.09):0.03,B:0.12);") +TR <- c(tr1, tr2) +sp_tree <- coalSpeciesTree(TR) +} \references{ Liu, L., Yu, L. and Pearl, D. K. (2010) Maximum tree: a consistent estimator of the species tree. \emph{Journal of Mathematical diff --git a/man/pml.fit.Rd b/man/pml.fit.Rd index 8a3df6a1..53cb2f44 100644 --- a/man/pml.fit.Rd +++ b/man/pml.fit.Rd @@ -80,6 +80,16 @@ the package coalescentMCMC, but are not intended for end user. Most of the functions call C code and are far less forgiving if the import is not what they expect than \code{pml}. } +\examples{ +data(Laurasiatherian) +tree <- NJ(dist.ml(Laurasiatherian)) +bf <- rep(0.25, 4) +eig <- edQt() +pml.init(Laurasiatherian) +pml.fit(tree, Laurasiatherian, bf=bf, eig=eig) +pml.free() +pml(tree, Laurasiatherian) |> logLik() +} \references{ Felsenstein, J. (1981) Evolutionary trees from DNA sequences: a maximum likelihood approach. \emph{Journal of Molecular Evolution}, diff --git a/man/write.ancestral.Rd b/man/write.ancestral.Rd index b474be9f..d50e2a26 100644 --- a/man/write.ancestral.Rd +++ b/man/write.ancestral.Rd @@ -36,6 +36,19 @@ alignment. \code{read.ancestral} tries to read in these files. This allows also to read in reconstruction made by iqtree to use the plotting capabilities of R. } +\examples{ +data(Laurasiatherian) +fit <- pml_bb(Laurasiatherian[,1:100], "JC", rearrangement = "none") +anc_ml <- ancestral.pml(fit) +write.ancestral(anc_ml) +# Can be also results from iqtree +align <- read.phyDat("ancestral_align.fasta") +tree <- read.tree("ancestral_tree.nwk") +df <- read.table("ancestral.state", header=TRUE) +anc_ml_disc <- ancestral(tree, align, df) +plotAnc(anc_ml_disc, 20) +unlink(c("ancestral_align.fasta", "ancestral_tree.nwk", "ancestral.state")) +} \seealso{ \code{\link{ancestral.pml}}, \code{\link{plot.ancestral}} } From 395a1690a03e94b867d030f86257e366ef5af417 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Mon, 29 Apr 2024 15:57:12 +0200 Subject: [PATCH 174/216] clean up --- R/pmlPart.R | 22 ---------------------- 1 file changed, 22 deletions(-) diff --git a/R/pmlPart.R b/R/pmlPart.R index 1d59c3a9..7f42f4b7 100644 --- a/R/pmlPart.R +++ b/R/pmlPart.R @@ -226,28 +226,6 @@ makePart <- function(fit, rooted, weight = ~index + genes) { #' @rdname pmlPart #' @export -#multiphyDat2pmlPart <- function(x, rooted = FALSE, ...) { -# shared_tree <- TRUE -# if (shared_tree) { -# concatenate_x <- do.call(cbind.phyDat, x@seq) -# dm <- dist.ml(concatenate_x) -# if (!rooted) tree <- NJ(dm) -# else tree <- upgma(dm) -# } -# else tree <- NULL -# fun <- function(x, rooted = FALSE, tree, ...) { -# if (is.null(tree)) { -# dm <- dist.ml(x) -# if (!rooted) tree <- NJ(dm) -# else tree <- upgma(dm) -# } -# pml(tree, x, ...) -# } -# fits <- lapply(x@seq, fun, tree = tree, rooted = rooted, ...) -# fits -#} -# -# multiphyDat2pmlPart <- function(x, method="unrooted", tip.dates=NULL, ...) { if(is.list(x) && all(sapply(x,inherits, "phyDat"))) seq <- x else if(inherits(x, "multiphyDat")) seq <- x@seq From 729e8a49e4704aba8ac54b5a45e04f85066eb87e Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Mon, 29 Apr 2024 15:58:23 +0200 Subject: [PATCH 175/216] add tests --- inst/tinytest/test_ancestral.R | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/inst/tinytest/test_ancestral.R b/inst/tinytest/test_ancestral.R index 185c46d0..bd01eb41 100644 --- a/inst/tinytest/test_ancestral.R +++ b/inst/tinytest/test_ancestral.R @@ -27,3 +27,20 @@ test_acctran <- ancestral.pars(tree, dna, "ACCTRAN") #expect_equal(test_mpr_2[,1], test_acctran_2[,1], check.attributes = FALSE) + +data(Laurasiatherian) +fit <- pml_bb(Laurasiatherian[,1:100], "JC", rearrangement = "none") +anc_ml <- ancestral.pml(fit) +write.ancestral(anc_ml) +align <- read.phyDat("ancestral_align.fasta") +tree <- read.tree("ancestral_tree.nwk") +df <- read.table("ancestral.state", header=TRUE) +anc_ml_disc <- ancestral(tree, align, df) +expect_equal(anc_ml[[1]], anc_ml_disc[[1]]) +expect_equal(anc_ml[[2]], anc_ml_disc[[2]]) +expect_equal(anc_ml[[3]], anc_ml_disc[[3]]) +expect_equal(anc_ml[[4]], anc_ml_disc[[4]]) +unlink(c("ancestral_align.fasta", "ancestral_tree.nwk", "ancestral.state")) + + +## Felsenstein example From 0f27d82b8e7602ac498b958d0f5ae4768dfbe271 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Wed, 1 May 2024 19:31:28 +0200 Subject: [PATCH 176/216] improve man pages add return values --- R/Densi.R | 1 + R/ancestral.R | 2 ++ R/draw_CI.R | 4 ++-- R/gap_as_state.R | 2 +- R/image_phyDat.R | 1 + R/networx.R | 3 +++ man/add_ci.Rd | 3 +++ man/add_edge_length.Rd | 3 +++ man/as.networx.Rd | 3 +++ man/densiTree.Rd | 3 +++ man/image.phyDat.Rd | 3 +++ man/plot.networx.Rd | 4 ++++ 12 files changed, 29 insertions(+), 3 deletions(-) diff --git a/R/Densi.R b/R/Densi.R index 8a7fa78f..b566d846 100644 --- a/R/Densi.R +++ b/R/Densi.R @@ -95,6 +95,7 @@ add_tiplabels <- function(xy, tip.label, direction, adj, font, srt = 0, cex = 1, #' @param xlim the x limits of the plot. #' @param ylim the y limits of the plot. #' @param \dots further arguments to be passed to plot. +#' @returns \code{densiTree} returns silently a list with graphics parameter. #' @author Klaus Schliep \email{klaus.schliep@@gmail.com} #' @seealso \code{\link{plot.phylo}}, \code{\link{plot.networx}}, #' \code{\link{jitter}}, \code{\link{rtt}} diff --git a/R/ancestral.R b/R/ancestral.R index 6fd2b2c5..74897258 100644 --- a/R/ancestral.R +++ b/R/ancestral.R @@ -187,6 +187,7 @@ ancestral.pml <- function(object, type = "marginal", ...) { #' @param x an object of class ancestral. #' @param file a file name. File endings are added. #' @param ... Further arguments passed to or from other methods. +#' @returns \code{write.ancestral} returns the input x invisibly. #' @seealso \code{\link{ancestral.pml}}, \code{\link{plot.ancestral}} #' @examples #' data(Laurasiatherian) @@ -208,6 +209,7 @@ write.ancestral <- function(x, file="ancestral"){ write.table(x$prob, file=paste0(file, ".state"), quote=FALSE, row.names=FALSE, sep="\t") write.tree(x$tree, file=paste0(file, "_tree.nwk")) + invisible(x) } diff --git a/R/draw_CI.R b/R/draw_CI.R index ef1a71fb..27b4dad3 100644 --- a/R/draw_CI.R +++ b/R/draw_CI.R @@ -46,7 +46,7 @@ edge_length_matrix <- function(tree, trees, rooted=TRUE){ ##' @param rooted rooted logical, if FALSE edge lengths is a function of the ##' observed splits, if TRUE edge lengths are estimated from height for the ##' observed clades. -##' @return NULL +##' @return The tree with newly assigned edge length. ##' @author Klaus Schliep ##' @importFrom graphics legend rect ##' @examples @@ -94,7 +94,7 @@ add_edge_length <- function(tree, trees, fun=\(x)median(na.omit(x)), ##' @param \dots arguments passed to other functions, \code{\link{legend}} or ##' \code{\link{bxp}}. ##' @details All trees should to be rooted, either ultrametric or tip dated. -##' @return NULL +##' @return Nothing. Function is called for adding to a plot. ##' @author Emmanuel Paradis, Santiago Claramunt, Joseph Brown, Klaus Schliep ##' @importFrom graphics legend rect bxp boxplot ##' @importFrom stats median diff --git a/R/gap_as_state.R b/R/gap_as_state.R index 9acfae0a..f71fdb91 100644 --- a/R/gap_as_state.R +++ b/R/gap_as_state.R @@ -96,5 +96,5 @@ remove_similar <- function(x, k=3, index=FALSE){ ind_dist <- ind_dist[ind_dist[,1] < ind_dist[,2], ] dist_i <- unique(ind_dist[, 2]) if(index) return(dist_i) - x[-dist_i] + x[-dist_i, ] } diff --git a/R/image_phyDat.R b/R/image_phyDat.R index d636fe81..8a686477 100644 --- a/R/image_phyDat.R +++ b/R/image_phyDat.R @@ -6,6 +6,7 @@ #' Codons triplets are handled as nucleotide sequences. #' @param x an object containing sequences, an object of class \code{phyDat}. #' @param ... further arguments passed to or from other methods. +#' @returns Nothing. The function is called for plotting. #' @seealso \code{\link{image.DNAbin}}, \code{\link{image.AAbin}} #' @examples #' data("chloroplast") diff --git a/R/networx.R b/R/networx.R index a51817ea..f67e3b39 100644 --- a/R/networx.R +++ b/R/networx.R @@ -266,6 +266,7 @@ circNetwork <- function(x, ord = NULL) { #' splits (may excludes splits). #' @param coord add coordinates of the nodes, allows to reproduce the plot. #' @param \dots Further arguments passed to or from other methods. +#' @returns an object of class \code{networx}. #' @note The internal representation is likely to change. #' @author Klaus Schliep \email{klaus.schliep@@gmail.com} #' @seealso \code{\link{consensusNet}}, \code{\link{neighborNet}}, @@ -649,6 +650,8 @@ rotate_matrix <- function(x, theta){ #' @param digits if edge labels are numerical a positive integer indicating how #' many significant digits are to be used. #' @param \dots Further arguments passed to or from other methods. +#' @returns \code{plot.networx} returns invisibly a list with paramters of the +#' plot. #' @rdname plot.networx #' @note The internal representation is likely to change. #' @author Klaus Schliep \email{klaus.schliep@@gmail.com} diff --git a/man/add_ci.Rd b/man/add_ci.Rd index c5ac6b5f..e449716f 100644 --- a/man/add_ci.Rd +++ b/man/add_ci.Rd @@ -28,6 +28,9 @@ blue.} \item{\dots}{arguments passed to other functions, \code{\link{legend}} or \code{\link{bxp}}.} } +\value{ +Nothing. Function is called for adding to a plot. +} \description{ These are low-level plotting commands to draw the confidence intervals on the node of a tree as rectangles with coloured backgrounds or diff --git a/man/add_edge_length.Rd b/man/add_edge_length.Rd index 337e6a90..424cba2a 100644 --- a/man/add_edge_length.Rd +++ b/man/add_edge_length.Rd @@ -20,6 +20,9 @@ is computed from.} observed splits, if TRUE edge lengths are estimated from height for the observed clades.} } +\value{ +The tree with newly assigned edge length. +} \description{ This command can infer some average edge lengths and assign them from a (bootstrap/MCMC) sample. diff --git a/man/as.networx.Rd b/man/as.networx.Rd index 7accd4f9..18b1f0a6 100644 --- a/man/as.networx.Rd +++ b/man/as.networx.Rd @@ -23,6 +23,9 @@ splits (may excludes splits).} \item{coord}{add coordinates of the nodes, allows to reproduce the plot.} } +\value{ +an object of class \code{networx}. +} \description{ \code{as.networx} convert \code{splits} objects into a \code{networx} object. And most important there exists a generic \code{plot} function to diff --git a/man/densiTree.Rd b/man/densiTree.Rd index 7dd4a55a..ec0bbe7f 100644 --- a/man/densiTree.Rd +++ b/man/densiTree.Rd @@ -70,6 +70,9 @@ jitter and random or equally spaced (see details below)} \item{\dots}{further arguments to be passed to plot.} } +\value{ +\code{densiTree} returns silently a list with graphics parameter. +} \description{ An R function to plot trees similar to those produced by DensiTree. } diff --git a/man/image.phyDat.Rd b/man/image.phyDat.Rd index 73adb0bf..637c9e1b 100644 --- a/man/image.phyDat.Rd +++ b/man/image.phyDat.Rd @@ -11,6 +11,9 @@ \item{...}{further arguments passed to or from other methods.} } +\value{ +Nothing. The function is called for plotting. +} \description{ This function plots an image of an alignment of sequences. } diff --git a/man/plot.networx.Rd b/man/plot.networx.Rd index e29fb925..d740627c 100644 --- a/man/plot.networx.Rd +++ b/man/plot.networx.Rd @@ -83,6 +83,10 @@ many significant digits are to be used.} \item{\dots}{Further arguments passed to or from other methods.} } +\value{ +\code{plot.networx} returns invisibly a list with paramters of the +plot. +} \description{ So far not all parameters behave the same on the the \code{rgl} \code{"3D"} and basic graphic \code{"2D"} device. From 884d31bf33ccfcfba7956918d75716bf4a2b5f7b Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Fri, 3 May 2024 21:08:31 +0200 Subject: [PATCH 177/216] =?UTF-8?q?change=20to=20make=5Fgraph,=20pointed?= =?UTF-8?q?=20out=20by=20Szabolcs=20Horv=C3=A1t?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- NAMESPACE | 2 +- R/networx.R | 15 ++++++--------- 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 23630e8e..75b90bf4 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -295,9 +295,9 @@ importFrom(graphics,title) importFrom(igraph,E) importFrom(igraph,all_shortest_paths) importFrom(igraph,decompose) -importFrom(igraph,graph) importFrom(igraph,graph_from_adjacency_matrix) importFrom(igraph,layout_nicely) +importFrom(igraph,make_graph) importFrom(igraph,shortest_paths) importFrom(igraph,topo_sort) importFrom(igraph,vcount) diff --git a/R/networx.R b/R/networx.R index f67e3b39..0747a5d5 100644 --- a/R/networx.R +++ b/R/networx.R @@ -85,7 +85,7 @@ addEdge <- function(network, desc, spl) { fromTo <- intersect(attr(desc, "cycle"), split[[1]]) fromTo <- parent[match(fromTo, child)] - g <- graph(t(edge), directed = FALSE) + g <- make_graph(t(edge), directed = FALSE) ind <- NULL for (i in 2:length(fromTo)) { d <- all_shortest_paths(g, fromTo[i - 1], fromTo[i])$res @@ -177,7 +177,7 @@ circNetwork <- function(x, ord = NULL) { fromTo <- ordStart:ordStop if (ordStart > ordStop) fromTo <- c(ordStart:nTips, 1:ordStop) fromTo <- ord[fromTo] - g <- graph(t(res$edge), directed = FALSE) + g <- make_graph(t(res$edge), directed = FALSE) isChild <- (rsY == (Y %*% X[k, ]))[index] sp2 <- NULL @@ -278,7 +278,7 @@ circNetwork <- function(x, ord = NULL) { #' Intertwining phylogenetic trees and networks. \emph{Methods Ecol Evol}. #' \bold{8}, 1212--1220. doi:10.1111/2041-210X.12760 #' @keywords plot -#' @importFrom igraph graph +#' @importFrom igraph make_graph #' @examples #' #' set.seed(1) @@ -415,7 +415,7 @@ as.networx.phylo <- function(x, ...) { # as.igraph.networx <- function(x, directed=FALSE){ -# graph(t(x$edge), directed=directed) +# make_graph(t(x$edge), directed=directed) # } @@ -425,7 +425,7 @@ reorder.networx <- function(x, order = "cladewise", index.only = FALSE, ...) { if (!is.null(attr(x, "order"))) if (attr(x, "order") == order) return(x) - g <- graph(t(x$edge)) + g <- make_graph(t(x$edge)) if (order == "cladewise") neword <- topo_sort(g, "out") else neword <- topo_sort(g, "in") neworder <- order(match(x$edge[, 1], neword)) @@ -509,9 +509,6 @@ coords <- function(obj, dim = "3D") { adj <- spMatrix(n, n, i = obj$edge[, 2], j = obj$edge[, 1], x = rep(1, length(obj$edge.length))) g <- graph_from_adjacency_matrix(adj, "undirected") - # add this - # g2 <- graph(t(obj$edge), directed=FALSE) - # g2 <- set.edge.attribute(g, "weight", value=rep(1, nrow(obj$edge)) if (dim == "3D") { coord <- layout_nicely(g, dim = 3) k <- matrix(0, max(obj$splitIndex), 3) @@ -668,7 +665,7 @@ rotate_matrix <- function(x, theta){ #' Intertwining phylogenetic trees and networks. \emph{Methods Ecol Evol}. #' \bold{8}, 1212--1220. doi:10.1111/2041-210X.12760 #' @keywords plot -#' @importFrom igraph graph +#' @importFrom igraph make_graph #' @examples #' #' set.seed(1) From a025619a39264cc86ad36ba58f4a4f59bf8536c9 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Fri, 10 May 2024 18:22:36 +0200 Subject: [PATCH 178/216] Improve man pages --- R/draw_CI.R | 2 +- R/maxCladeCred.R | 20 ++++++++++++++++---- man/add_ci.Rd | 2 +- man/maxCladeCred.Rd | 22 +++++++++++++++++----- man/write.ancestral.Rd | 3 +++ 5 files changed, 38 insertions(+), 11 deletions(-) diff --git a/R/draw_CI.R b/R/draw_CI.R index 27b4dad3..06b89042 100644 --- a/R/draw_CI.R +++ b/R/draw_CI.R @@ -110,7 +110,7 @@ add_edge_length <- function(tree, trees, fun=\(x)median(na.omit(x)), ##' plot(tree, direction="downwards") ##' add_boxplot(tree, trees, boxwex=.7) ##' @seealso \code{\link{plot.phylo}}, \code{\link{plotBS}}, -##' \code{\link{add_edge_length}} +##' \code{\link{add_edge_length}}, \code{\link{maxCladeCred}} ##' @keywords aplot ##' @rdname add_ci ##' @export diff --git a/R/maxCladeCred.R b/R/maxCladeCred.R index 79ef2ead..5aba0825 100644 --- a/R/maxCladeCred.R +++ b/R/maxCladeCred.R @@ -2,15 +2,18 @@ #' #' \code{maxCladeCred} computes the maximum clade credibility tree from a #' sample of trees. -#' #' So far just the best tree is returned. No annotations or transformations of -#' edge length are performed. +#' edge length are performed and the edge length are taken from the tree. #' #' If a list of partition is provided then the clade credibility is computed #' for the trees in x. #' #' \code{allCompat} returns a 50\% majority rule consensus tree with added -#' compatible splits similar to the option allcompat in MrBayes. +#' compatible splits similar to the option allcompat in MrBayes. This tree has +#' no edge length. +#' +#' \code{\link{add_edge_length}} can be used to add edge lengths computed from +#' the sample of trees. #' #' @param x \code{x} is an object of class \code{multiPhylo} or \code{phylo} #' @param tree logical indicating whether return the tree with the clade @@ -23,7 +26,8 @@ #' @author Klaus Schliep \email{klaus.schliep@@gmail.com} #' @seealso \code{\link{consensus}}, \code{\link{consensusNet}}, #' \code{\link{prop.part}}, \code{\link{bootstrap.pml}}, \code{\link{plotBS}}, -#' \code{\link{transferBootstrap}}, \code{\link{add_edge_length}} +#' \code{\link{transferBootstrap}}, \code{\link{add_edge_length}}, +#' \code{\link{add_boxplot}} #' @keywords cluster #' @importFrom fastmatch fmatch #' @examples @@ -45,6 +49,14 @@ #' plot(majority_consensus, main="Majority consensus tree") #' plot(all_compat, main="Majority consensus tree with compatible splits") #' plot(max_clade_cred, main="Maximum clade credibility tree") +#' +#' par(mfrow = c(2,1)) +#' plot(max_clade_cred, main="Edge length from tree") +#' add_boxplot(max_clade_cred, bs) +#' max_clade_cred_2 <- add_edgelength(max_clade_cred, bs) +#' plot(max_clade_cred_2, main="Edge length computed from sample") +#' add_boxplot(max_clade_cred_2, bs) +#' #' par(old.par) #' #' # compute clade credibility for trees given a prop.part object diff --git a/man/add_ci.Rd b/man/add_ci.Rd index e449716f..9a3c8535 100644 --- a/man/add_ci.Rd +++ b/man/add_ci.Rd @@ -53,7 +53,7 @@ add_boxplot(tree, trees, boxwex=.7) } \seealso{ \code{\link{plot.phylo}}, \code{\link{plotBS}}, -\code{\link{add_edge_length}} +\code{\link{add_edge_length}}, \code{\link{maxCladeCred}} } \author{ Emmanuel Paradis, Santiago Claramunt, Joseph Brown, Klaus Schliep diff --git a/man/maxCladeCred.Rd b/man/maxCladeCred.Rd index 62582c94..a4fa81f9 100644 --- a/man/maxCladeCred.Rd +++ b/man/maxCladeCred.Rd @@ -30,16 +30,19 @@ credibility or a numeric vector of clade credibilities for each tree. \description{ \code{maxCladeCred} computes the maximum clade credibility tree from a sample of trees. +So far just the best tree is returned. No annotations or transformations of +edge length are performed and the edge length are taken from the tree. } \details{ -So far just the best tree is returned. No annotations or transformations of -edge length are performed. - If a list of partition is provided then the clade credibility is computed for the trees in x. \code{allCompat} returns a 50\% majority rule consensus tree with added -compatible splits similar to the option allcompat in MrBayes. +compatible splits similar to the option allcompat in MrBayes. This tree has +no edge length. + +\code{\link{add_edge_length}} can be used to add edge lengths computed from +the sample of trees. } \examples{ @@ -60,6 +63,14 @@ plot(strict_consensus, main="Strict consensus tree") plot(majority_consensus, main="Majority consensus tree") plot(all_compat, main="Majority consensus tree with compatible splits") plot(max_clade_cred, main="Maximum clade credibility tree") + +par(mfrow = c(2,1)) +plot(max_clade_cred, main="Edge length from tree") +add_boxplot(max_clade_cred, bs) +max_clade_cred_2 <- add_edgelength(max_clade_cred, bs) +plot(max_clade_cred_2, main="Edge length computed from sample") +add_boxplot(max_clade_cred_2, bs) + par(old.par) # compute clade credibility for trees given a prop.part object @@ -72,7 +83,8 @@ maxCladeCred(c(tree, bs[[1]]), tree=FALSE, part = pp) \seealso{ \code{\link{consensus}}, \code{\link{consensusNet}}, \code{\link{prop.part}}, \code{\link{bootstrap.pml}}, \code{\link{plotBS}}, -\code{\link{transferBootstrap}}, \code{\link{add_edge_length}} +\code{\link{transferBootstrap}}, \code{\link{add_edge_length}}, +\code{\link{add_boxplot}} } \author{ Klaus Schliep \email{klaus.schliep@gmail.com} diff --git a/man/write.ancestral.Rd b/man/write.ancestral.Rd index d50e2a26..767e55aa 100644 --- a/man/write.ancestral.Rd +++ b/man/write.ancestral.Rd @@ -26,6 +26,9 @@ each state and site.} \item{...}{Further arguments passed to or from other methods.} } +\value{ +\code{write.ancestral} returns the input x invisibly. +} \description{ \code{write.ancestral} allows to export ancestral reconstructions. It writes out the tree, a tab delimited text file with the probabilities and the From 1671ae35cae00edf921ee895ca0306eac749d238 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Fri, 10 May 2024 18:59:49 +0200 Subject: [PATCH 179/216] bugfix --- R/maxCladeCred.R | 2 +- man/maxCladeCred.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/maxCladeCred.R b/R/maxCladeCred.R index 5aba0825..42a3a0c4 100644 --- a/R/maxCladeCred.R +++ b/R/maxCladeCred.R @@ -53,7 +53,7 @@ #' par(mfrow = c(2,1)) #' plot(max_clade_cred, main="Edge length from tree") #' add_boxplot(max_clade_cred, bs) -#' max_clade_cred_2 <- add_edgelength(max_clade_cred, bs) +#' max_clade_cred_2 <- add_edge_length(max_clade_cred, bs) #' plot(max_clade_cred_2, main="Edge length computed from sample") #' add_boxplot(max_clade_cred_2, bs) #' diff --git a/man/maxCladeCred.Rd b/man/maxCladeCred.Rd index a4fa81f9..1de7ead8 100644 --- a/man/maxCladeCred.Rd +++ b/man/maxCladeCred.Rd @@ -67,7 +67,7 @@ plot(max_clade_cred, main="Maximum clade credibility tree") par(mfrow = c(2,1)) plot(max_clade_cred, main="Edge length from tree") add_boxplot(max_clade_cred, bs) -max_clade_cred_2 <- add_edgelength(max_clade_cred, bs) +max_clade_cred_2 <- add_edge_length(max_clade_cred, bs) plot(max_clade_cred_2, main="Edge length computed from sample") add_boxplot(max_clade_cred_2, bs) From 82dfdbba371e16aeed3be3b440b581d9450aaf9c Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Fri, 17 May 2024 09:36:18 +0200 Subject: [PATCH 180/216] avoid warning --- R/plot_pml.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/plot_pml.R b/R/plot_pml.R index 0c29fcf0..553eb864 100644 --- a/R/plot_pml.R +++ b/R/plot_pml.R @@ -59,7 +59,8 @@ plot.pml <- function(x, type="phylogram", direction = "rightwards", ...){ axisPhylo(side, root.time = root_time, backward = FALSE, cex.axis=cex.axis) } - else if(x$method=="ultrametric") axisPhylo(side, cex.axis=cex.axis) + else if(!is.null(x$method) && x$method=="ultrametric") + axisPhylo(side, cex.axis=cex.axis) else add.scale.bar(cex=cex) } else add.scale.bar(cex=cex) From ca442e56f11261b9b0cb0640dcf937ea64f8b6a0 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Fri, 17 May 2024 17:32:06 +0200 Subject: [PATCH 181/216] add some additional test infrastructure --- DESCRIPTION | 3 +++ 1 file changed, 3 insertions(+) diff --git a/DESCRIPTION b/DESCRIPTION index 4593a713..c5c56062 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -64,7 +64,9 @@ Suggests: rgl, rmarkdown, seqinr, + testthat (>= 3.0.0), tinytest, + vdiffr, xtable LinkingTo: Rcpp @@ -78,3 +80,4 @@ Repository: CRAN Roxygen: list(old_usage = TRUE) RoxygenNote: 7.3.1 Language: en-US +Config/testthat/edition: 3 From 83b39c860b1167d42d9ec59023fe6e281476d689 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Fri, 17 May 2024 17:32:45 +0200 Subject: [PATCH 182/216] updates --- inst/tinytest/test_dist_tree.R | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/inst/tinytest/test_dist_tree.R b/inst/tinytest/test_dist_tree.R index a18e67fd..e99cf1f9 100644 --- a/inst/tinytest/test_dist_tree.R +++ b/inst/tinytest/test_dist_tree.R @@ -2,17 +2,25 @@ data(Laurasiatherian) dm <- dist.ml(Laurasiatherian) # check nnls functions - tree_nj <- NJ(dm) - tree_unj <- UNJ(dm) - tree_nnls_unj <- nnls.phylo(tree_unj, dm) - tree_nnls_nj <- nnls.phylo(tree_nj, dm) +tree_nj <- NJ(dm) +tree_unj <- UNJ(dm) +tree_nnls_unj <- nnls.phylo(tree_unj, dm) +tree_nnls_nj <- nnls.phylo(tree_nj, dm) - tree_upgma <- upgma(dm) - tree_wpgma <- wpgma(dm) - tree_nnls_upgma <- nnls.phylo(tree_upgma, dm, method="ultrametric") - tree_nnls_wpgma <- nnls.phylo(tree_wpgma, dm, method="ultrametric") +tree_upgma <- upgma(dm) +tree_wpgma <- wpgma(dm) +tree_nnls_upgma <- nnls.phylo(tree_upgma, dm, method="ultrametric") +tree_nnls_wpgma <- nnls.phylo(tree_wpgma, dm, method="ultrametric") + +expect_equal(tree_upgma, tree_nnls_upgma) +expect_false(all.equal(tree_wpgma, tree_nnls_wpgma)) +expect_equal(tree_unj, tree_nnls_unj) +expect_false(all.equal(tree_nj, tree_nnls_nj)) + +# Test NNI +# tree_upgma_no_nni <- upgma(dm, NNI=FALSE) +# tree_upgma_nni <- upgma(dm, NNI=TRUE) + +# expect_true(is.ultrametric(tree_upgma_nni)) +# expect_true(sum(tree_upgma_no_nni$edge.length)>=sum(tree_upgma_nni$edge.length)) - expect_equal(tree_upgma, tree_nnls_upgma) - expect_false(all.equal(tree_wpgma, tree_nnls_wpgma)) -# expect_equal(tree_unj, tree_nnls_unj) - expect_false(all.equal(tree_nj, tree_nnls_nj)) From 447c641cb5181701133aef78b5a34d6b7415d3e5 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Fri, 17 May 2024 17:33:03 +0200 Subject: [PATCH 183/216] add tests --- inst/tinytest/test_parsimony.R | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/inst/tinytest/test_parsimony.R b/inst/tinytest/test_parsimony.R index 42847207..68f76378 100644 --- a/inst/tinytest/test_parsimony.R +++ b/inst/tinytest/test_parsimony.R @@ -72,7 +72,6 @@ tree2 <- acctran(tree2, dat) expect_equal(sum(tree2$edge.length), fitch(tree2,dat)) - # test random.addition ra_tree <- random.addition(yeast) ratchet_tree <- pratchet(yeast, start=ra_tree, trace=0) @@ -80,3 +79,34 @@ expect_true(attr(ra_tree, "pscore") >= attr(ratchet_tree, "pscore")) trivial_tree <- pratchet(dat, trace=0, all=FALSE, minit = 10, maxit = 20) expect_true(inherits(trivial_tree, "phylo")) + +# test CI / RI +data(carnivora) +frm <- ~SuperFamily/Family/Genus/Species +tr <- as.phylo(frm, data = carnivora, collapse=FALSE) +tr$edge.length <- rep(1, nrow(tr$edge)) + +X <- matrix(0, 112, 7, dimnames = list(tr$tip.label, c("Canidae", "Felidae", + "Ursidae", "Canidae_Felidae", "Canidae_Ursidae", "Felidae_Ursidae", + "Canidae_Felidae_Ursidae"))) +desc_canidae <- Descendants(tr, "Canidae")[[1]] +desc_felidae <- Descendants(tr, "Felidae")[[1]] +desc_ursidae <- Descendants(tr, "Ursidae")[[1]] + +X[desc_canidae, c(1,4,5,7)] <- 1 +X[desc_felidae, c(2,4,6,7)] <- 1 +X[desc_ursidae, c(3,5,6,7)] <- 1 + +X <- phyDat(X, "USER", levels=c(0,1)) + +#col <- rep("black", 112) +#col[desc_felidae] <- "red" +#col[desc_canidae] <- "blue" +#col[desc_ursidae] <- "green" +#plot(tr, "f", tip.color=col, show.node=TRUE) + +ci1 <- CI(tr, X, sitewise = TRUE) +ri1 <- RI(tr, X, sitewise = TRUE) +expect_true(all(ci1[1:3]==1)) +expect_true(all(ri1[1:3]==1)) + From 87902a60c329c607fcb81832ed95acd20cb7ab85 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Fri, 17 May 2024 17:39:04 +0200 Subject: [PATCH 184/216] remove plot.ancestral for now --- NAMESPACE | 3 +-- R/plotAnc.R | 30 +++++++++++++++--------------- man/plot.ancestral.Rd | 19 ++++++------------- 3 files changed, 22 insertions(+), 30 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 75b90bf4..8dc8ef23 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -43,6 +43,7 @@ S3method(c,splits) S3method(cbind,phyDat) S3method(cophenetic,networx) S3method(cophenetic,splits) +S3method(distinct,splits) S3method(glance,phyDat) S3method(glance,pml) S3method(glance,pmlMix) @@ -55,7 +56,6 @@ S3method(logLik,pmlMix) S3method(logLik,pmlPart) S3method(midpoint,multiPhylo) S3method(midpoint,phylo) -S3method(plot,ancestral) S3method(plot,codonTest) S3method(plot,networx) S3method(plot,pml) @@ -149,7 +149,6 @@ export(dist.logDet) export(dist.ml) export(dist.p) export(distanceHadamard) -export(distinct.splits) export(diversity) export(dna2aa) export(dna2codon) diff --git a/R/plotAnc.R b/R/plotAnc.R index 28288e2f..b725bcd1 100644 --- a/R/plotAnc.R +++ b/R/plotAnc.R @@ -34,7 +34,7 @@ getAncDF <- function(x){ #' @param x an object of class \code{ancestral}. ## @param site.pattern logical, plot i-th site pattern or i-th site #' @param i,site plots the i-th site. -#' @param which either "pie" or "seqlogo" +## @param which either "pie" or "seqlogo" #' @param node to plot for which the probabilities should be plotted. #' @param start start position to plot. #' @param end end position to plot. @@ -70,20 +70,6 @@ getAncDF <- function(x){ #' plotSeqLogo(anc.ch, node="Node1", 1, 25, scheme="Clustal") #' @importFrom grDevices hcl.colors #' @importFrom ggseqlogo make_col_scheme ggseqlogo -#' @rdname plot.ancestral -#' @export -plot.ancestral <- function(x, which = c("pie", "seqlogo"), site = 1, - node=getRoot(x$tree), col = NULL, cex.pie = .5, pos = "bottomright", - scheme=NULL, start=1, end=10, ...){ - stopifnot(inherits(x, "ancestral")) - which <- match.arg(which, c("pie", "seqlogo"), TRUE) - if(which=="pie")plotAnc(x, i = site, col = col, cex.pie = cex.pie, pos = pos, - scheme=scheme, ...) - if(which=="seqlogo")plotSeqLogo(x, node, start=start, end=end, scheme=scheme, ...) -} - - - #' @rdname plot.ancestral #' @export plotAnc <- function(x, i = 1, col = NULL, @@ -204,4 +190,18 @@ plotSeqLogo <- function(x, node=getRoot(x$tree), start=1, end=10, scheme="Ape_NT # image(tmp, ...) #} +##' @rdname plot.ancestral +##' @export +#plot.ancestral <- function(x, which = c("pie", "seqlogo"), site = 1, +# node=getRoot(x$tree), col = NULL, cex.pie = .5, pos = "bottomright", +# scheme=NULL, start=1, end=10, ...){ +# stopifnot(inherits(x, "ancestral")) +# which <- match.arg(which, c("pie", "seqlogo"), TRUE) +# if(which=="pie")plotAnc(x, i = site, col = col, cex.pie = cex.pie, pos = pos, +# scheme=scheme, ...) +# if(which=="seqlogo")plotSeqLogo(x, node, start=start, end=end, scheme=scheme, ...) +#} + + + diff --git a/man/plot.ancestral.Rd b/man/plot.ancestral.Rd index 4b6d6943..858afec0 100644 --- a/man/plot.ancestral.Rd +++ b/man/plot.ancestral.Rd @@ -1,15 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/plotAnc.R -\name{plot.ancestral} -\alias{plot.ancestral} +\name{plotAnc} \alias{plotAnc} \alias{plotSeqLogo} \title{Plot ancestral character on a tree} \usage{ -\method{plot}{ancestral}(x, which = c("pie", "seqlogo"), site = 1, - node = getRoot(x$tree), col = NULL, cex.pie = 0.5, - pos = "bottomright", scheme = NULL, start = 1, end = 10, ...) - plotAnc(x, i = 1, col = NULL, cex.pie = 0.5, pos = "bottomright", scheme = NULL, ...) @@ -19,9 +14,7 @@ plotSeqLogo(x, node = getRoot(x$tree), start = 1, end = 10, \arguments{ \item{x}{an object of class \code{ancestral}.} -\item{which}{either "pie" or "seqlogo"} - -\item{node}{to plot for which the probabilities should be plotted.} +\item{i, site}{plots the i-th site.} \item{col}{a vector containing the colors for all possible states.} @@ -33,13 +26,13 @@ plotSeqLogo(x, node = getRoot(x$tree), start = 1, end = 10, "Zappo_AA", "Clustal", "Polarity" and "Transmembrane_tendency", for nucleotides "Ape_NT" and"RY_NT". Names can be abbreviated.} -\item{start}{start position to plot.} +\item{\dots}{Further arguments passed to or from other methods.} -\item{end}{end position to plot.} +\item{node}{to plot for which the probabilities should be plotted.} -\item{\dots}{Further arguments passed to or from other methods.} +\item{start}{start position to plot.} -\item{i, site}{plots the i-th site.} +\item{end}{end position to plot.} } \description{ \code{plotAnc} plots a phylogeny and adds character to the nodes. Either From 01934aaf0a199ff30ead36dcea66b1358ec4af57 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Fri, 17 May 2024 17:41:06 +0200 Subject: [PATCH 185/216] add tests for some plots --- tests/testthat.R | 12 ++++ .../_snaps/plot_ancestral/pie-plots.svg | 72 +++++++++++++++++++ .../_snaps/plot_ancestral/seqlogo.svg | 0 .../_snaps/plot_networx/plot-networx.svg | 54 ++++++++++++++ tests/testthat/test_plot_ancestral.R | 22 ++++++ tests/testthat/test_plot_networx.R | 6 ++ tests/testthat/test_plot_pml.R | 13 ++++ 7 files changed, 179 insertions(+) create mode 100644 tests/testthat.R create mode 100644 tests/testthat/_snaps/plot_ancestral/pie-plots.svg create mode 100644 tests/testthat/_snaps/plot_ancestral/seqlogo.svg create mode 100644 tests/testthat/_snaps/plot_networx/plot-networx.svg create mode 100644 tests/testthat/test_plot_ancestral.R create mode 100644 tests/testthat/test_plot_networx.R create mode 100644 tests/testthat/test_plot_pml.R diff --git a/tests/testthat.R b/tests/testthat.R new file mode 100644 index 00000000..06677bb2 --- /dev/null +++ b/tests/testthat.R @@ -0,0 +1,12 @@ +# This file is part of the standard setup for testthat. +# It is recommended that you do not modify it. +# +# Where should you do additional test configuration? +# Learn more about the roles of various files in: +# * https://r-pkgs.org/testing-design.html#sec-tests-files-overview +# * https://testthat.r-lib.org/articles/special-files.html + +library(testthat) +library(phangorn) + +test_check("phangorn") diff --git a/tests/testthat/_snaps/plot_ancestral/pie-plots.svg b/tests/testthat/_snaps/plot_ancestral/pie-plots.svg new file mode 100644 index 00000000..e75f471b --- /dev/null +++ b/tests/testthat/_snaps/plot_ancestral/pie-plots.svg @@ -0,0 +1,72 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +t1 +t2 +t3 +t4 + + + + + + + + + + + + + + + + + + + + + + + + + +a +g +c +t +- + + diff --git a/tests/testthat/_snaps/plot_ancestral/seqlogo.svg b/tests/testthat/_snaps/plot_ancestral/seqlogo.svg new file mode 100644 index 00000000..e69de29b diff --git a/tests/testthat/_snaps/plot_networx/plot-networx.svg b/tests/testthat/_snaps/plot_networx/plot-networx.svg new file mode 100644 index 00000000..936397ca --- /dev/null +++ b/tests/testthat/_snaps/plot_networx/plot-networx.svg @@ -0,0 +1,54 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +1 +2 +3 +4 +5 + + diff --git a/tests/testthat/test_plot_ancestral.R b/tests/testthat/test_plot_ancestral.R new file mode 100644 index 00000000..3b79e5b2 --- /dev/null +++ b/tests/testthat/test_plot_ancestral.R @@ -0,0 +1,22 @@ +tree <- read.tree(text = "((t1:1,t2:1):1,(t3:1,t4:1):1);") +tree2 <- read.tree(text = "(((t1:1,t2:1):1,t3:2):1,t4:3);") +dat <- matrix(c("a", "a", + "a", "t", + "t", "a", + "t", "t"), byrow = TRUE, nrow = 4L, + dimnames = list(c("t1", "t2", "t3", "t4"), NULL)) +dna <- phyDat(dat) +fit <- pml(tree, dna) +test_ml <- ancestral.pml(fit, type = "ml") + + +test_that("Pie_plots works", { + pie_ancestral <- function() plotAnc(test_ml) + vdiffr::expect_doppelganger("Pie plots", pie_ancestral) +}) + + +test_that("plotSeqLogo works", { + seq_logo <- function() plotSeqLogo(test_ml) + vdiffr::expect_doppelganger("SeqLogo", seq_logo) +}) diff --git a/tests/testthat/test_plot_networx.R b/tests/testthat/test_plot_networx.R new file mode 100644 index 00000000..14d68a09 --- /dev/null +++ b/tests/testthat/test_plot_networx.R @@ -0,0 +1,6 @@ +net <- as.networx(allCircularSplits(5)) + +test_that("plot.networx works", { + networx_plot <- function() plot(net) + vdiffr::expect_doppelganger("plot.networx", networx_plot) +}) diff --git a/tests/testthat/test_plot_pml.R b/tests/testthat/test_plot_pml.R new file mode 100644 index 00000000..e8182802 --- /dev/null +++ b/tests/testthat/test_plot_pml.R @@ -0,0 +1,13 @@ +tree <- read.tree(text = "((t1:1,t2:1):1,(t3:1,t4:1):1);") +dat <- matrix(c("a", "a", + "a", "t", + "t", "a", + "t", "t"), byrow = TRUE, nrow = 4L, + dimnames = list(c("t1", "t2", "t3", "t4"), NULL)) +dna <- phyDat(dat) +fit <- pml(tree, dna) + +test_that("plot.pml works", { + pml_plot <- function() plot(fit) + vdiffr::expect_doppelganger("plot.pml", pml_plot) +}) From 7089787b7d66937c19359c9d512ebcd252b78111 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Fri, 17 May 2024 17:57:49 +0200 Subject: [PATCH 186/216] bug fix --- R/ancestral.R | 2 +- R/plotAnc.R | 3 ++- man/plot.ancestral.Rd | 2 +- man/write.ancestral.Rd | 2 +- 4 files changed, 5 insertions(+), 4 deletions(-) diff --git a/R/ancestral.R b/R/ancestral.R index 74897258..7057a8a7 100644 --- a/R/ancestral.R +++ b/R/ancestral.R @@ -188,7 +188,7 @@ ancestral.pml <- function(object, type = "marginal", ...) { #' @param file a file name. File endings are added. #' @param ... Further arguments passed to or from other methods. #' @returns \code{write.ancestral} returns the input x invisibly. -#' @seealso \code{\link{ancestral.pml}}, \code{\link{plot.ancestral}} +#' @seealso \code{\link{ancestral.pml}}, \code{\link{plotAnc}} #' @examples #' data(Laurasiatherian) #' fit <- pml_bb(Laurasiatherian[,1:100], "JC", rearrangement = "none") diff --git a/R/plotAnc.R b/R/plotAnc.R index b725bcd1..7c308d53 100644 --- a/R/plotAnc.R +++ b/R/plotAnc.R @@ -33,7 +33,8 @@ getAncDF <- function(x){ ## ancestral. #' @param x an object of class \code{ancestral}. ## @param site.pattern logical, plot i-th site pattern or i-th site -#' @param i,site plots the i-th site. +#' @param i plots the i-th site. +## ,site ## @param which either "pie" or "seqlogo" #' @param node to plot for which the probabilities should be plotted. #' @param start start position to plot. diff --git a/man/plot.ancestral.Rd b/man/plot.ancestral.Rd index 858afec0..0d553f6b 100644 --- a/man/plot.ancestral.Rd +++ b/man/plot.ancestral.Rd @@ -14,7 +14,7 @@ plotSeqLogo(x, node = getRoot(x$tree), start = 1, end = 10, \arguments{ \item{x}{an object of class \code{ancestral}.} -\item{i, site}{plots the i-th site.} +\item{i}{plots the i-th site.} \item{col}{a vector containing the colors for all possible states.} diff --git a/man/write.ancestral.Rd b/man/write.ancestral.Rd index 767e55aa..a1db929a 100644 --- a/man/write.ancestral.Rd +++ b/man/write.ancestral.Rd @@ -53,5 +53,5 @@ plotAnc(anc_ml_disc, 20) unlink(c("ancestral_align.fasta", "ancestral_tree.nwk", "ancestral.state")) } \seealso{ -\code{\link{ancestral.pml}}, \code{\link{plot.ancestral}} +\code{\link{ancestral.pml}}, \code{\link{plotAnc}} } From 7166ab5218bb1010ec22f53887fb52229f6aa4d8 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Fri, 17 May 2024 18:49:52 +0200 Subject: [PATCH 187/216] bug fix --- NAMESPACE | 2 +- R/splits.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 8dc8ef23..d11b6ed4 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -43,7 +43,6 @@ S3method(c,splits) S3method(cbind,phyDat) S3method(cophenetic,networx) S3method(cophenetic,splits) -S3method(distinct,splits) S3method(glance,phyDat) S3method(glance,pml) S3method(glance,pmlMix) @@ -149,6 +148,7 @@ export(dist.logDet) export(dist.ml) export(dist.p) export(distanceHadamard) +export(distinct.splits) export(diversity) export(dna2aa) export(dna2codon) diff --git a/R/splits.R b/R/splits.R index 27b83985..88d7abc9 100644 --- a/R/splits.R +++ b/R/splits.R @@ -185,7 +185,7 @@ unique.splits <- function(x, incomparables = FALSE, unrooted = TRUE, ...) { } -#' @export +#' @export distinct.splits distinct.splits <- function(...) { tmp <- c(...) res <- unique(tmp) From 542c0f56d41c928651617cddb268ec49b032a9f1 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Sun, 19 May 2024 19:17:14 +0200 Subject: [PATCH 188/216] update code coverage --- .Rbuildignore | 1 + .github/workflows/test-coverage.yaml | 23 +++++++++++++++++------ README.md | 2 +- codecov.yml | 14 ++++++++++++++ 4 files changed, 33 insertions(+), 7 deletions(-) create mode 100644 codecov.yml diff --git a/.Rbuildignore b/.Rbuildignore index 418bb97e..07bb9909 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -17,3 +17,4 @@ ^vignettes/AdvancedFeatures_cache$ ^vignettes/logo.png ^codemeta\.json$ +^codecov\.yml$ diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml index 2c5bb502..fefc52e2 100644 --- a/.github/workflows/test-coverage.yaml +++ b/.github/workflows/test-coverage.yaml @@ -8,6 +8,8 @@ on: name: test-coverage +permissions: read-all + jobs: test-coverage: runs-on: ubuntu-latest @@ -15,7 +17,7 @@ jobs: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: r-lib/actions/setup-r@v2 with: @@ -23,28 +25,37 @@ jobs: - uses: r-lib/actions/setup-r-dependencies@v2 with: - extra-packages: any::covr + extra-packages: any::covr, any::xml2 needs: coverage - name: Test coverage run: | - covr::codecov( + cov <- covr::package_coverage( quiet = FALSE, clean = FALSE, - install_path = file.path(Sys.getenv("RUNNER_TEMP"), "package") + install_path = file.path(normalizePath(Sys.getenv("RUNNER_TEMP"), winslash = "/"), "package") ) + covr::to_cobertura(cov) shell: Rscript {0} + - uses: codecov/codecov-action@v4 + with: + fail_ci_if_error: ${{ github.event_name != 'pull_request' && true || false }} + file: ./cobertura.xml + plugin: noop + disable_search: true + token: ${{ secrets.CODECOV_TOKEN }} + - name: Show testthat output if: always() run: | ## -------------------------------------------------------------------- - find ${{ runner.temp }}/package -name 'testthat.Rout*' -exec cat '{}' \; || true + find '${{ runner.temp }}/package' -name 'testthat.Rout*' -exec cat '{}' \; || true shell: bash - name: Upload test results if: failure() - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: coverage-test-failures path: ${{ runner.temp }}/package diff --git a/README.md b/README.md index 5f3c32f5..5e2c8fa4 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ [![CRAN Status Badge](https://www.r-pkg.org/badges/version/phangorn)](https://cran.r-project.org/package=phangorn) [![CRAN Downloads (monthly)](https://cranlogs.r-pkg.org/badges/phangorn)](https://cran.r-project.org/package=phangorn) [![CRAN Downloads (total)](https://cranlogs.r-pkg.org/badges/grand-total/phangorn)](https://cran.r-project.org/package=phangorn) -[![codecov.io](https://codecov.io/github/KlausVigo/phangorn/coverage.svg?branch=master)](https://app.codecov.io/github/KlausVigo/phangorn?branch=master) +[![Codecov test coverage](https://codecov.io/gh/KlausVigo/phangorn/branch/master/graph/badge.svg)](https://app.codecov.io/gh/KlausVigo/phangorn?branch=master) # phangorn diff --git a/codecov.yml b/codecov.yml new file mode 100644 index 00000000..04c55859 --- /dev/null +++ b/codecov.yml @@ -0,0 +1,14 @@ +comment: false + +coverage: + status: + project: + default: + target: auto + threshold: 1% + informational: true + patch: + default: + target: auto + threshold: 1% + informational: true From 7dd4383c3732e8d37805388cacd69fad6fccedd2 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Sun, 19 May 2024 20:32:21 +0200 Subject: [PATCH 189/216] add test --- tests/testthat/test_plot_pml.R | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tests/testthat/test_plot_pml.R b/tests/testthat/test_plot_pml.R index e8182802..e25e7c86 100644 --- a/tests/testthat/test_plot_pml.R +++ b/tests/testthat/test_plot_pml.R @@ -1,4 +1,5 @@ tree <- read.tree(text = "((t1:1,t2:1):1,(t3:1,t4:1):1);") +tree2 <- read.tree(text = "((t1:1.1,t2:1.1):1,(t3:1,t4:1):1.1);") dat <- matrix(c("a", "a", "a", "t", "t", "a", @@ -11,3 +12,12 @@ test_that("plot.pml works", { pml_plot <- function() plot(fit) vdiffr::expect_doppelganger("plot.pml", pml_plot) }) + + +trees <- c(tree, tree2) |> .compressTipLabel() + +test_that("densiTree works", { + densi_plot <- function() densiTree(trees, type="phylogram", width=2, + jitter=list(amount=.1, random=FALSE), alpha=1) + vdiffr::expect_doppelganger("densiTree", densi_plot) +}) From 342fceec4f7cf3e26be2db73738675ce3e0d3fc6 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Mon, 20 May 2024 12:17:45 +0200 Subject: [PATCH 190/216] remove warning --- R/plotAnc.R | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/R/plotAnc.R b/R/plotAnc.R index 7c308d53..b3518035 100644 --- a/R/plotAnc.R +++ b/R/plotAnc.R @@ -141,6 +141,26 @@ plotAnc <- function(x, i = 1, col = NULL, if (!is.null(pos)) legend(pos, legend=levels, pch=21, pt.bg = col) } + +my_ggseqlogo <-function (data, facet = "wrap", scales = "free_x", ncol = NULL, + nrow = NULL, ...) +{ + x <- geom_logo(data = data, ...) + x[[2]] <- scale_x_continuous(limits = c(start-0.5, end+.5) , + breaks=pretty(seq(start, end))) + p <- ggplot() + geom_logo(data = data, ...) + theme_logo() + if (!"list" %in% class(data)) return(p) + facet <- match.arg(facet, c("grid", "wrap")) + if (facet == "grid") { + p <- p + facet_grid(~seq_group, scales = scales) + } + else if (facet == "wrap") { + p <- p + facet_wrap(~seq_group, scales = scales, nrow = nrow, ncol = ncol) + } + return(p) +} + + #' @rdname plot.ancestral #' @importFrom ggplot2 scale_x_continuous #' @export @@ -176,9 +196,7 @@ plotSeqLogo <- function(x, node=getRoot(x$tree), start=1, end=10, scheme="Ape_NT } else SC <- make_col_scheme(chars=lev, cols= hcl.colors(length(lev))) - ggseqlogo(X, col_scheme=SC, method='p') + - scale_x_continuous(limits = c(start-0.5, end+.5) , - breaks=pretty(seq(start, end))) + my_ggseqlogo(X, col_scheme=SC, method='p') } From 656d1dc6f923bc854702652bccdf1d5ccf7db528 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Mon, 20 May 2024 12:26:33 +0200 Subject: [PATCH 191/216] add write.pml --- NAMESPACE | 1 + R/pml_generics.R | 11 +++++++++++ 2 files changed, 12 insertions(+) diff --git a/NAMESPACE b/NAMESPACE index d11b6ed4..c5937427 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -246,6 +246,7 @@ export(write.nexus.dist) export(write.nexus.networx) export(write.nexus.splits) export(write.phyDat) +export(write.pml) export(write.splits) export(writeDist) import(Rcpp) diff --git a/R/pml_generics.R b/R/pml_generics.R index 28cf68fd..c607c01e 100644 --- a/R/pml_generics.R +++ b/R/pml_generics.R @@ -122,3 +122,14 @@ print.pml <- function(x, ...) { print(bf) #cat(bf, "\n") } } + + +#' @export +write.pml <- function(x, file=tempfile()){ + write.tree(x$tree, file=paste0(file, "_tree.nwk")) + if(!is.null(x$bs)) write.tree(x$bs, file=paste0(file, "_bs.nwk")) + sink(paste0(file, "_pml.txt")) + print.pml(x) + sink() + invisible(x) +} From 9bbeed92db7f39fa16039f8f570f695aed418a79 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Mon, 20 May 2024 15:13:11 +0200 Subject: [PATCH 192/216] add imports --- NAMESPACE | 5 +++++ R/plotAnc.R | 5 ++--- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index c5937427..e84efdd2 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -261,9 +261,14 @@ importFrom(Matrix,sparseMatrix) importFrom(fastmatch,fmatch) importFrom(generics,glance) importFrom(generics,tidy) +importFrom(ggplot2,facet_grid) +importFrom(ggplot2,facet_wrap) +importFrom(ggplot2,ggplot) importFrom(ggplot2,scale_x_continuous) +importFrom(ggseqlogo,geom_logo) importFrom(ggseqlogo,ggseqlogo) importFrom(ggseqlogo,make_col_scheme) +importFrom(ggseqlogo,theme_logo) importFrom(grDevices,adjustcolor) importFrom(grDevices,col2rgb) importFrom(grDevices,hcl.colors) diff --git a/R/plotAnc.R b/R/plotAnc.R index b3518035..4a45ad86 100644 --- a/R/plotAnc.R +++ b/R/plotAnc.R @@ -162,7 +162,8 @@ my_ggseqlogo <-function (data, facet = "wrap", scales = "free_x", ncol = NULL, #' @rdname plot.ancestral -#' @importFrom ggplot2 scale_x_continuous +#' @importFrom ggplot2 scale_x_continuous ggplot facet_grid facet_wrap +#' @importFrom ggseqlogo geom_logo theme_logo #' @export plotSeqLogo <- function(x, node=getRoot(x$tree), start=1, end=10, scheme="Ape_NT", ...){ stopifnot(inherits(x, "ancestral")) @@ -200,8 +201,6 @@ plotSeqLogo <- function(x, node=getRoot(x$tree), start=1, end=10, scheme="Ape_NT } -# p = ggplot() + geom_logo(data = data, ...) + theme_logo() - ##' @rdname plot.ancestral ##' @export #image.ancestral <- function(x, ...){ From e4e1622ba663e7b4db828c4d6b77eb4c89338d61 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Mon, 20 May 2024 15:40:36 +0200 Subject: [PATCH 193/216] bug fix --- R/plotAnc.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/plotAnc.R b/R/plotAnc.R index 4a45ad86..370d2bde 100644 --- a/R/plotAnc.R +++ b/R/plotAnc.R @@ -143,12 +143,12 @@ plotAnc <- function(x, i = 1, col = NULL, my_ggseqlogo <-function (data, facet = "wrap", scales = "free_x", ncol = NULL, - nrow = NULL, ...) + nrow = NULL, start=NULL, end=NULL, ...) { x <- geom_logo(data = data, ...) x[[2]] <- scale_x_continuous(limits = c(start-0.5, end+.5) , breaks=pretty(seq(start, end))) - p <- ggplot() + geom_logo(data = data, ...) + theme_logo() + p <- ggplot() + x + theme_logo() if (!"list" %in% class(data)) return(p) facet <- match.arg(facet, c("grid", "wrap")) if (facet == "grid") { @@ -197,7 +197,7 @@ plotSeqLogo <- function(x, node=getRoot(x$tree), start=1, end=10, scheme="Ape_NT } else SC <- make_col_scheme(chars=lev, cols= hcl.colors(length(lev))) - my_ggseqlogo(X, col_scheme=SC, method='p') + my_ggseqlogo(X, col_scheme=SC, method='p', start=start, end=end) } From 3be40de8c30cdee0aea925dca9a2bc663f36d2de Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Mon, 20 May 2024 16:03:26 +0200 Subject: [PATCH 194/216] add man pages --- R/pml_generics.R | 21 ++++++++++++++++++--- man/pml.Rd | 9 +++++++++ man/write.pml.Rd | 30 ++++++++++++++++++++++++++++++ 3 files changed, 57 insertions(+), 3 deletions(-) create mode 100644 man/write.pml.Rd diff --git a/R/pml_generics.R b/R/pml_generics.R index c607c01e..1d5d92ac 100644 --- a/R/pml_generics.R +++ b/R/pml_generics.R @@ -1,3 +1,4 @@ +#' @rdname pml #' @export logLik.pml <- function(object, ...) { res <- object$logLik @@ -28,7 +29,7 @@ BIC.pml <- function(object, ...) { res } - +#' @rdname pml #' @export anova.pml <- function(object, ...) { X <- c(list(object), list(...)) @@ -60,7 +61,7 @@ vcov.pml <- function(object, ...) { res } - +#' @rdname pml #' @export print.pml <- function(x, ...) { model <- guess_model(x) @@ -124,8 +125,22 @@ print.pml <- function(x, ...) { } +#' Export pml objects +#' +#' \code{write.pml} writes out the ML tree and the model parameters. +#' +#' @param x an object of class ancestral. +#' @param file a file name. File endings are added. +#' @param ... Further arguments passed to or from other methods. +#' @returns \code{write.pml} returns the input x invisibly. +#' @seealso \code{\link{ancestral.pml}}, \code{\link{plotAnc}} +#' @examples +#' data(woodmouse) +#' fit <- pml_bb(woodmouse, "JC", rearrangement = "none") +#' write.pml(fit, "woodmouse") +#' unlink(c("woodmouse_pml.txt", "woodmouse_tree.nwk")) #' @export -write.pml <- function(x, file=tempfile()){ +write.pml <- function(x, file=tempfile(), ...){ write.tree(x$tree, file=paste0(file, "_tree.nwk")) if(!is.null(x$bs)) write.tree(x$bs, file=paste0(file, "_bs.nwk")) sink(paste0(file, "_pml.txt")) diff --git a/man/pml.Rd b/man/pml.Rd index 8f32357e..9d79876b 100644 --- a/man/pml.Rd +++ b/man/pml.Rd @@ -4,7 +4,10 @@ \alias{as.pml} \alias{pml} \alias{optim.pml} +\alias{logLik.pml} +\alias{anova.pml} \alias{vcov.pml} +\alias{print.pml} \title{Likelihood of a tree.} \usage{ as.pml(x, ...) @@ -18,7 +21,13 @@ optim.pml(object, optNni = FALSE, optBf = FALSE, optQ = FALSE, rearrangement = ifelse(optNni, "NNI", "none"), subs = NULL, ratchet.par = ratchet.control(), ...) +\method{logLik}{pml}(object, ...) + +\method{anova}{pml}(object, ...) + \method{vcov}{pml}(object, ...) + +\method{print}{pml}(x, ...) } \arguments{ \item{x}{So far only an object of class \code{modelTest}.} diff --git a/man/write.pml.Rd b/man/write.pml.Rd new file mode 100644 index 00000000..a7c1a285 --- /dev/null +++ b/man/write.pml.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/pml_generics.R +\name{write.pml} +\alias{write.pml} +\title{Export pml objects} +\usage{ +write.pml(x, file = tempfile(), ...) +} +\arguments{ +\item{x}{an object of class ancestral.} + +\item{file}{a file name. File endings are added.} + +\item{...}{Further arguments passed to or from other methods.} +} +\value{ +\code{write.pml} returns the input x invisibly. +} +\description{ +\code{write.pml} writes out the ML tree and the model parameters. +} +\examples{ +data(woodmouse) +fit <- pml_bb(woodmouse, "JC", rearrangement = "none") +write.pml(fit, "woodmouse") +unlink(c("woodmouse_pml.txt", "woodmouse_tree.nwk")) +} +\seealso{ +\code{\link{ancestral.pml}}, \code{\link{plotAnc}} +} From 2231d74c0e12e69f3beca8c520036a451146fcdb Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Tue, 21 May 2024 09:50:52 +0200 Subject: [PATCH 195/216] no Remotes --- DESCRIPTION | 1 - 1 file changed, 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index c5c56062..b364e172 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -70,7 +70,6 @@ Suggests: xtable LinkingTo: Rcpp -Remotes: github::EmmanuelParadis/ape VignetteBuilder: knitr, utils From ec885414f235f6598744d2c667ec9395916a9ddb Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Tue, 21 May 2024 13:11:42 +0200 Subject: [PATCH 196/216] fix plot --- vignettes/Ancestral.Rmd | 4 ---- vignettes/seqLogo.png | Bin 9198 -> 0 bytes 2 files changed, 4 deletions(-) delete mode 100644 vignettes/seqLogo.png diff --git a/vignettes/Ancestral.Rmd b/vignettes/Ancestral.Rmd index d1a65387..caafacb8 100644 --- a/vignettes/Ancestral.Rmd +++ b/vignettes/Ancestral.Rmd @@ -54,10 +54,6 @@ The `plotSeqLogo` function is a wrapper around the from the _ggseqlogo_ function #seqLogo( t(subset(anc.mpr, getRoot(tree), 1:20)[[1]]), ic.scale=FALSE) plotSeqLogo(anc.mpr, node=getRoot(tree), 1, 20) ``` -![](seqLogo.png) - - - ```{r MPR, fig.cap="Fig 2. Ancestral reconstruction using MPR."} plotAnc(anc.mpr, 17) title("MPR") diff --git a/vignettes/seqLogo.png b/vignettes/seqLogo.png deleted file mode 100644 index ac48532d2cc2ec5a06630d6a09f3f04dc5edf3cc..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 9198 zcmc(FcTiK!`|n9;0#c*|L5eMuZ~#F-K>-Z?YpoZV;V`Rwjf_St7Q$;Qf5 zfKQ4K0005A)5dlHfQA47m9~R}U1Ku4P{OX5w6Sn7VHW{loBg0rC^Q<)!NI}F$;rjV z#lyq1W5*6&US2*vK7M}wojZ352nYxY3JM7c2@4DF+OXfOe>FLv_&CJZq z&CM+=EG#W8t*or9t*vcrY;0|9?dFAD@trkeHZw?b@~L*RLlfCEd7jgF>MsCnw*$c{3#?B{elQEiElQJv}2MC@8E(r3?}AqY}dR#skK{`~p#ii(QL%F3#$s_N?Mnwpy0+S>a1`i6#v z#>U1MFJ3e?H8B_rCX@N{<;z#EUNtv2x3sjpe*OB*n>Vekt!-^>?d|O>7OSJ9qqDQK ztE;QKySt~S=k43Ky}iBf-o5MV>+A3D9~c-I92|WA{{4p!ABKj8hKGkoMn*dA$e*F09)2E4viO-)ufBEuda&mHNYU=CPuiw6Xo1UJYnVFfLot>MTo1dRwSXlV} z{ris}KNc4kmzI`({`~pt*RSQ}<&~9{)z#ItwYA^BfB*UOXMKHrV`F1;b8~BJYn!dv z1;F<9Hh>$q0D!%KnX#e6rQG?v$On#&d2d#AZggcf8y^xrrsQ>SBLLMc6}NP=}y_r$z^jp0%a4+2OAo4A`kYWAsb;7 zsCXT~h#3R?eEZ2l~eR*{a$8IC`)biNPA<}rJ~x}=qRJdn0M07;T3 zHH5#nXmGP~XGKnA!6-tB&JaJ`R+~SIqFT}gAYR^cysv`=P*@brC}+u-51>Z)DUo4K0_p8|>yG7S`IXU`8dHBP4Bs7Mh}`Cf#Ba?C zQXT2yP$;_p4*Tt2%P2XRlS4kv$Ot}}fW3)=+cfe-r9i*O+GSGJ)eH?Q6dcsP2dr*s zl~6g4(1W-0r>G_t&@WFA1>Sv0XZBKUaL9&f zjwHm13Z-Yy?wZ{}%?6NzcbYyZKlVLrM=nsRZie6P_)R7zoVgG;d6bLXLIYr5iJvG+ zm!sGA1SU-u+#eTc88q+cf=ElS1VMEPxRw^dC`l{L?)eJ0@#a90nY+!vqRWDm zm;TJ&EE`p%!8At(!l5vhYt41>*nEA#l_49{jd8*k1$n&r3~CfL=AEVt*jhRfEx{m1p3o zZi;-9_h;nq(h!^CCbaA57LnmHRdOb>`<6rq2PGYD3p_>zSanE=DCEG4Md3$!YtNA> zB;D9P&r`_UU4)BunQsLy$a$1tE|3^zkuf;mjHaiAM|ofDm_BoN_*qooNNeqE zpH~C3{Mg`~c#{iDy9D76PkWLInB~eHykL2vXI$9gy}_}?6d|o@>%1TDCJzgZ7D|8S zpiat5>|Gvv1M65ADh=8gJ!D7?ZAT_Q9a=7ay>cR8J?UG_D_2BNbTVvIV7<$h6{gmFz;?XK;1n& z4ojK?(31NRM#hdFt%tSz48BMAP!!*8kMnRbC8*2=0sr!CkTb48sg`#BKEQ63u2FRi z_BB_Og#M)aak!4$kelVG^I%f{($`^>k>Y2TWnbHn>X7h0)$+K!(h*ku0j#dVHCjP11ah{_YKq#=oI|H z!qDf^SF0qj4hc7EifeIl2auf<^}~>r<-xej-5-Ggt7X|G4FIo;Q-Q1~YzN@UWDeEZ z%Pa3g!OO>>Ewt@e{qzLA5Z!eYgWX^sKU~@Gh(JEbmll8^g`)StSj;JM zWylkA!evIu3INSe_T}?F%8sZ5`nMcRu|Wr(1c(6d_IKIF_kPBt4E8)Ibb%IhT={kK=q6ciY6!X4iH zwl+GpRcSis-fs&1o*##&cJ0~B2$mxgC1-^Qj`zGS#8qyUdxh)$ z$X(CsmRvr?`lL3>l2?Q0CUp#)7N*eDpDa;)qEB@qJU!4YO80$h_*L`Xw0ly_yYe;s zs=lP2#kiUz-D*-#kYFUrTSnxX9pQ1=b5cmKvWiVtO%2ouZvGY7zn-}ZeDmY z!ZrXBPLD-O&KMj)G`E#2Y;74OI8{c5Xq^ysA@(X}h8wc`4p7!?ghinl|8s*j z^|mJ49uJk`ww*5G1*yN(PfatHu&k+|Z%+5K^?&j=g`{vpW|qhtbHBbE4S`21XCFVy zJ$CtH+?N9;SaQquup5%w?ct`!OPJW^3*jQo6d&6?yx9Gw=G2*ZqW+*ZZ+vkCi-Wo* z`S>aq$@sHn)auQPH~(d6124+GadSn+q60Q(!@dXqk6Hj$k`Pu@gw9k4Hb!of?u}lY z|3-6rgd*69g3?d}$goq8ADMaBeBY~5t6gho7dUa~ejHS@DL^$R>|F@^h)oB88?iM6}CMc(xYG9Uh+97ozXL4Ad z*{T&(KGE~$FQ$=bkiRyZF63<1l-MOs|HS&BM0u~G@E-)|0#ed0eRpOT@_LNfwJjLw z`Ir={50{-Ab+bAb; zD0i%vB?`zPLwf+)5hmYS4E;+;fN0Mv*b#T!Su!3wQv3iWZpY9|HfyyHp0+K=%aL3` zUOLWKOc+$JD*BgJN-lV#atvucGr}x!;N&dlrf?AKPn7;kljWqj_#g+-IKR?D@&J7*{`c)-i}C_>t*7mD7M!G)%^-(fOm8sugT2frD`7br0HfZy-m z;Y8E+x4Ig|{2fQ;&+a-;(o#?cz@AnyfQX)*4myy!{;?>i%V6I;A7u0$<24hD7?%BRf zvB?$~wU&2TiB);Xk5AgkglG2Gt60{1uUq=lQUTofk@hc4&P_Ik=O3pFF0P{A z7qb?KbIWG7YGk53V+TPi645=%d%ad{;4>^x(d_)PAX4(K!PAeh+1(T&m7Q=c>wC!9 zZ~Ur8_+Yly0vzNS1M{M+0x@tw%F~OZ1pt1=Nq+zhvUGHDyXZ332EXb&m)Hog(yPzC zQPdFsDfq3Hm^fX){*g}y-v_oYW9di9e`e|)dlC9pIrb zSoRM;-#W3#2-c@%F2rZXq@M?l4QO)Ap;Y+spnPhaz2_>v_=x~t5br6%upE#_9EEQrYZ;B^6t#(U(5e$ ze1J^!%b$bu{yIK`ct=MWvI#C#nQjL?mMxy?eN(3#m~#K@2K52~iq)aI6dVZM zJZn>`%3flJT5!St6!R``RC*T6&oMMGR5W-cC6r|R;A%S`!6ao~7ScQkuw06ny|tSF zX0i@L)qxz{yk&7R&_NUdF6&`0#pQ=$5nQ1Tnm)(OfAd{{Dr7`u$9ixHfO$e^WLO?S z*W7U45B1F!CNuW)KcA-GGLoDGSW7K*RTONS7iPC3A?IdSm^j1l*;o_&_9_Zwf-+Fu zhoc+Y>&6Y{^j@5N6C?SY+hXG`M?3}QiHsQFwu56i|#hgp(#pi_>% za+|`B(KGeSi{Yef&1FNsNqYnJzym#KQg}58n5>uHM3o8IrL}qn^mg z3P@?-MdITeJNd!YH}f|xT_g#?m6=M=QhVT6H4en#h!|V50U8Z%d(tlg>Qh`h>UfvW zTDbQXBIl5aH)^k>?_`{)FGWLUlrq)$Ny6^mJ^PPKpy>+(G57s)s1?AmE5i}t24+qfzRS>lIm;5^wTu=jz z1HYFg#*bXe+PMRTiEv*NFX=7W5ZnarYC(%QZCKK4U4UNFQk$~MCqT6`8i1#p8k6Cm zbH$B6XaLw&$%#+B%ldt*e1l;(avL`5Z{;#`MbWGH(3o^0nqt+80_B6bkf9kgH1ob5 zO~BeCx}_H6Y$3!2Cl9&qjwYz&oaV+K9|-2-I z#$d-*`6#BH8yVZS2mE>BWqiSS#$6j$CkN;&0aqHQ+?Kum;}t;M6;@_P##RX+p3jX^ zG>;*DhO-hOT$I3gvr+?y*FH@uGb54nigr#?Egy_kp$b6ptO~eRjJg9;Z7nah3uH%a z7di|=o8PV$y~$?Stpb?X=i6d6rz~OKcf#jysX=U;GpPF>Mzb3ItUj5on&4P@k=Eyz zH$TVBcr-LDkKBIQ&VgZ@_s#kdm&1bk9y+bE;2=+W8eCke08K#(h|iz|xbFcBv%T5o z@~*pn9#8HUOMgLOHXB_&bu=(09{avj!NBPvs-tHHbKIWU3oh}{j8ZC|JD*2HUNk1& z5cOOZ741}w$GX;4zi1k4cz9`@BHjOZ|X+CO|{WuCqumP$gjK3aRs?6_fRaGc$@Ow!yJG6MI@c-ilpi) zPTD_=c_$8f25>;_#i0PDFlk6Lp6HaV0OfU}|1ymD1}uj~p45jWWp(_yd$o{nEhCKm zZRR=g3edT;zx*{tLo&a1m({T2&ZJUz*LRQF)&B}Iog1y}t1g822a6*|$IB1BI!WOM zJyT}U05e|*;N@nyF_ZFS?5MjZGV;QyfZPMOtqP&3XV_^K zLQcq2{%<-6gw=#RXR!dSgqEA@gJPDHCo?A6M54$6!=757bCHE3_vV!IH=ige)u^TOAIglKyN?@d_{}+E$jLydIzCB-6ge+ z11uN{Ofabd@EHdnFlg^4nz{w`_EUQn)KTCS?M-$dwW7~OQR3O1(kThbl>^k34W;Fv z$f4)vM;>zE>jP}FWgtywi^f_5M*gaO+@!AjaEk_t5*)w?z87C^8;p!}Rfy^6CY+zF6f9K{%(qh zG&u?IHu4a76na;=X}EUj6V?{m>pQ?LU_I`Aot^*0Z+>3tTWef0j_u3WO*ru?sl)wm zdM(fKQtKWgYH=p%CgI$OE3=m#SiM_-8s{|Zx(5;Af5cw0Mq*@Pby`cSldmr$pV}@2 zhG`cj)$V|kC`xR*e+|he5cLW6*{((#w}cNvIb?lq<+G2QWptDFG0R;$u!J8`jQw~3 z>U|Mq*Bl8Y6qJ4zq87VuI`?a*%Rq%n4%ntX*!HVSgDH$8Q;1w_Es2}d=Q;p6pl^u+ zROyfEg~!>ry`wPLAq6h)!@qMjNXYbj>>P)!AA|3BU45KpaVB_rV*&gNO3Mj8 zPRyt=0ZvyA{>O)dP7MZF4bXYmwp0=fA+xiwA#-ksCTH`EJUgkFfxlVD z?g0b5H-)HIo_vYxDbp9jV||z!q2j_|=Z%1qm)pL}ey=+zRo!2^{W?Bid(Qq6vcO;7Owk3H@HZ$3LHw&ryyw>nFNCO#rCQ zW~rBb#dp(_X<;_pY_G65$%W8wV+osFm{>^<_h=O1S#c_3LhZBi4mix?3;v}m&P?bp z$q=Be&nO_bS!=J+(5O>yw_)9rUvOL2;^vj8>9GWR)03raax-g9V`ZX_+R7?5!rERz z5?O0THulp@HbZ_x72=N&`kKZI;XYeTQ?IYvYl(PWzO{OzqK^MfreDz%aX)@e`{DF8 zyM=mr`-SFJyM^J@f)U=^X7z^!3r8LaJ{o!T#BO2VVLR%Nt!n#)$nBiq)Li4**q?#z zdOxC9uFBk*nVZ@%P?&1B(D>DULFie*NJy+%twKPd>mSSGkJrB_D6Ou1w_U(e6ju*7 z6^yK2s8&78;=R4z|nLn+~Dqg9MDmuF4Ex8@7n;(KB}KZsz6D{Vwf#_csbdvTs)YP?q^W zJqO8`ik(YOGgiUcmtN0#U#A??T<&VtuGe^G{}mdTb8gz(`0@zr0#ev{Tpb+AFVK8YVFx#(h^`F+QkQ_w@Qb&$-{D zayEO?^r!R#@&3;$6{-d`3Kn!DzSL9P%Ie>X*MH7hsF27~t{M#fJKD;>qa9(mRIf>i z(>>(MidVw^+{s^Gr(D||dB{agE}FeP!9mwN$|f%p%M>16o5s1zR=WoOGZw7mH9Of` zIz-zF`?2oF(felPO+eCoRE1^odZ#xKW#{>}c?p>(i?xr0C^KExMZz z_GHPdb{#SK+2zCG6)Pn?9y923;&=9XjY?T#m+YXs|BX>Cx7zwb<%q$HnF~V{-g$Ph z@gG{16U}ZNwMD?mLPr*~E}T}EkXGNQFJJw1_zvGdVVt#Iz3)~a$@qWfl@g`Pe7kwh zQ5q{e!~|}Q|J?XfCo`2O>43ZhPfnSY#-V*Dz1_EuoD+>h00h zJMS%yOyS4>q`A&Dq+fFl{?e4>-iZ3`wa^JC&4&qoswdT$N91|^{ue8X{{AlrNtP`; zy4`e#85aksv-R@KaEsHCt&9n`31{N{hp{d_JxSXK7Yu~^x37O}e)srQm356>?JPU< zZY}z}OWrOb88e?V(vQ8X^<@`gHPLGQAGWH$fntuGRC!(X}DHrsdk zeW5bFvYu|u$u4yVdRx@Z5Ab?CkV-R{8Uv_o;W*Th#%-f`{>N`;tFT&ihMV?qB+IEf~C9W96_}e8@#| zoUw8|wah8JN$>lOpTdk_!e+~QyGMZ6TzlE_=;x^>?a$Aq+&7NSlEueuB60N1dId(j ze_zzi7EeU~5@vXj!hc?{-h!JJelkIc%;j7xFc7`$WPQfet!(3NWuDr001thQo6MzC zB!IcA9(UZehJEFw=I>pA=~Bm7gTUDvZmLn=re1XyB`n9;Z`o`=3FBbIOZTBjKP5k# z=B;UZIax^W-u|r;mLIcGwc5X(bOC^bZ$JJ2ad!VFwx4fOG;Nba`g?E3%*4vL)Ci9M EFSeiqxBvhE From b273a84cdcc4c9504f8b0e61f9815e1c210a88ff Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Tue, 21 May 2024 15:20:51 +0200 Subject: [PATCH 197/216] clean up --- R/ancestral.R | 4 +--- R/distSeq.R | 6 ++---- R/sankoff.R | 3 +-- 3 files changed, 4 insertions(+), 9 deletions(-) diff --git a/R/ancestral.R b/R/ancestral.R index 7057a8a7..be45cb6e 100644 --- a/R/ancestral.R +++ b/R/ancestral.R @@ -364,9 +364,7 @@ pace <- ancestral.pars mpr.help <- function(tree, data, cost = NULL) { tree <- reorder(tree, "postorder") - if (!inherits(data, "phyDat")) { - stop("data must be of class phyDat") - } + if (!inherits(data, "phyDat")) stop("data must be of class phyDat") levels <- attr(data, "levels") l <- length(levels) if (is.null(cost)) { diff --git a/R/distSeq.R b/R/distSeq.R index f6249912..de1c0cdf 100644 --- a/R/distSeq.R +++ b/R/distSeq.R @@ -63,8 +63,7 @@ #' @export dist.hamming <- function(x, ratio = TRUE, exclude = "none"){ if(inherits(x, "DNAbin") | inherits(x, "AAbin")) x <- as.phyDat(x) - if (!inherits(x, "phyDat")) - stop("x must be of class phyDat") + if (!inherits(x, "phyDat")) stop("x must be of class phyDat") l <- length(x) contrast <- attr(x, "contrast") @@ -230,8 +229,7 @@ dist.ml <- function(x, model = "JC69", exclude = "none", bf = NULL, Q = NULL, dist.logDet <- function(x) { if(inherits(x, "DNAbin") | inherits(x, "AAbin")) x <- as.phyDat(x) - if (!inherits(x, "phyDat")) - stop("x must be of class phyDat") + if (!inherits(x, "phyDat")) stop("x must be of class phyDat") weight <- attr(x, "weight") contrast <- attr(x, "contrast") r <- attr(x, "nc") diff --git a/R/sankoff.R b/R/sankoff.R index ffb9e290..35031d23 100644 --- a/R/sankoff.R +++ b/R/sankoff.R @@ -46,8 +46,7 @@ fit.sankoff <- function(tree, data, cost, #' @rdname parsimony #' @export sankoff <- function(tree, data, cost = NULL, site = "pscore") { - if (!inherits(data, "phyDat")) - stop("data must be of class phyDat") + if (!inherits(data, "phyDat")) stop("data must be of class phyDat") data <- prepareDataSankoff(data) if (is.null(cost)) { levels <- attr(data, "levels") From bfc2650e5c1e412493049bd2d9fb65eaa2bb0a84 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Wed, 22 May 2024 18:39:25 +0200 Subject: [PATCH 198/216] avoid empty cycle attribute --- R/read.nexus.splits.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/R/read.nexus.splits.R b/R/read.nexus.splits.R index f5e1a4b0..214b3756 100644 --- a/R/read.nexus.splits.R +++ b/R/read.nexus.splits.R @@ -64,6 +64,8 @@ read.nexus.splits <- function(file) { spEnd <- spEnd[spEnd > sp][1] dims <- grep("DIMENSION", X, ignore.case = TRUE) cyc <- grep("CYCLE", X, ignore.case = TRUE) + fcyc <- FALSE + if(length(format) > 0) fcyc <- TRUE matr <- grep("MATRIX", X, ignore.case = TRUE) format <- grep("FORMAT", X, ignore.case = TRUE) start <- matr[matr > sp][1] + 1 @@ -119,7 +121,7 @@ read.nexus.splits <- function(file) { if (fint) attr(res, "intervals") <- intervals if (fcon) attr(res, "confidences") <- confidences if (flab) attr(res, "splitlabels") <- labels - attr(res, "cycle") <- cyc + if (fcyc) attr(res, "cycle") <- cyc class(res) <- "splits" res } From 7d484b333887035a6514d23bd1964d8f7906f404 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Thu, 23 May 2024 10:21:52 +0200 Subject: [PATCH 199/216] small improvements to plot functions --- R/networx.R | 32 ++++++++++++++++++++------------ R/read.nexus.splits.R | 2 +- 2 files changed, 21 insertions(+), 13 deletions(-) diff --git a/R/networx.R b/R/networx.R index 0747a5d5..ff2c874b 100644 --- a/R/networx.R +++ b/R/networx.R @@ -777,9 +777,6 @@ plot.networx <- function(x, type = "equal angle", use.edge.length = TRUE, } x$.plot <- list(vertices = coord, edge.color = edge.color, edge.width = edge.width, edge.lty = edge.lty) - L <- list(Ntip = nTips, type = "networx") - assign("last_plot.phylo", c(L, list(edge = x$edge, xx = coord[, 1], - yy = coord[, 2])), envir = .PlotPhyloEnv) invisible(x) } @@ -846,22 +843,30 @@ plot2D <- function(coords, net, show.tip.label = TRUE, show.edge.label = FALSE, cex.node.label = cex, cex.edge.label = cex, col.node.label = tip.color, col.edge.label = tip.color, font.node.label = font, font.edge.label = font, - add = FALSE, direction="horizontal", ...) { + add = FALSE, direction="horizontal", xlim=NULL, ylim=NULL, + ...) { edge <- net$edge label <- net$tip.label xx <- coords[, 1] yy <- coords[, 2] nTips <- length(label) - - xlim <- range(xx) - ylim <- range(yy) - direction <- match.arg(direction, c("horizontal", "axial")) - if (show.tip.label) { + if(is.null(xlim)){ + xlim <- range(xx) offset <- max(nchar(label)) * 0.018 * cex * diff(xlim) - xlim <- c(xlim[1] - offset, xlim[2] + offset) - ylim <- c(ylim[1] - 0.03 * cex * diff(ylim), ylim[2] + - 0.03 * cex * diff(ylim)) + if (show.tip.label) xlim <- c(xlim[1] - offset, xlim[2] + offset) } + if(is.null(ylim)){ + ylim <- range(yy) + if (show.tip.label) ylim <- c(ylim[1] - 0.03 * cex * diff(ylim), + ylim[2] + 0.03 * cex * diff(ylim)) + } + direction <- match.arg(direction, c("horizontal", "axial")) +# if (show.tip.label) { +# offset <- max(nchar(label)) * 0.018 * cex * diff(xlim) +# xlim <- c(xlim[1] - offset, xlim[2] + offset) +# ylim <- c(ylim[1] - 0.03 * cex * diff(ylim), ylim[2] + +# 0.03 * cex * diff(ylim)) +# } if (!add) { plot.new() plot.window(xlim, ylim, asp = 1) @@ -929,6 +934,9 @@ plot2D <- function(coords, net, show.tip.label = TRUE, show.edge.label = FALSE, text(xx, yy, labels = node.label, col = col.node.label, cex = cex.node.label, font = font.node.label) } + PP <- list(Ntip = nTips, type = "networx", edge = net$edge, xx = coords[, 1], + yy = coords[, 2], x.lim=xlim, y.lim=ylim) + assign("last_plot.phylo", PP, envir = .PlotPhyloEnv) } diff --git a/R/read.nexus.splits.R b/R/read.nexus.splits.R index 214b3756..9f0e9f57 100644 --- a/R/read.nexus.splits.R +++ b/R/read.nexus.splits.R @@ -65,7 +65,6 @@ read.nexus.splits <- function(file) { dims <- grep("DIMENSION", X, ignore.case = TRUE) cyc <- grep("CYCLE", X, ignore.case = TRUE) fcyc <- FALSE - if(length(format) > 0) fcyc <- TRUE matr <- grep("MATRIX", X, ignore.case = TRUE) format <- grep("FORMAT", X, ignore.case = TRUE) start <- matr[matr > sp][1] + 1 @@ -115,6 +114,7 @@ read.nexus.splits <- function(file) { tmp <- gsub("CYCLE", "", tmp, ignore.case = TRUE) tmp <- sub("\\s+", "", tmp) cyc <- as.integer(na.omit(as.numeric(strsplit(tmp, " ")[[1]]))) + fcyc <- TRUE } attr(res, "labels") <- x if (fwei) attr(res, "weights") <- weights From 942175fe7c8146337560ba1c047ed3ef7e7a4dfd Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Mon, 27 May 2024 09:58:44 +0200 Subject: [PATCH 200/216] return x invisible in densiTree --- R/Densi.R | 3 ++- man/densiTree.Rd | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/R/Densi.R b/R/Densi.R index b566d846..034c04f7 100644 --- a/R/Densi.R +++ b/R/Densi.R @@ -95,7 +95,7 @@ add_tiplabels <- function(xy, tip.label, direction, adj, font, srt = 0, cex = 1, #' @param xlim the x limits of the plot. #' @param ylim the y limits of the plot. #' @param \dots further arguments to be passed to plot. -#' @returns \code{densiTree} returns silently a list with graphics parameter. +#' @returns \code{densiTree} returns silently x. #' @author Klaus Schliep \email{klaus.schliep@@gmail.com} #' @seealso \code{\link{plot.phylo}}, \code{\link{plot.networx}}, #' \code{\link{jitter}}, \code{\link{rtt}} @@ -278,4 +278,5 @@ densiTree <- function(x, type = "cladogram", alpha = 1 / length(x), #root.time = x$root.time, align.tip.label = align.tip.label ) assign("last_plot.phylo", L, envir = .PlotPhyloEnv) + invisible(x) } diff --git a/man/densiTree.Rd b/man/densiTree.Rd index ec0bbe7f..ca8b42d1 100644 --- a/man/densiTree.Rd +++ b/man/densiTree.Rd @@ -71,7 +71,7 @@ jitter and random or equally spaced (see details below)} \item{\dots}{further arguments to be passed to plot.} } \value{ -\code{densiTree} returns silently a list with graphics parameter. +\code{densiTree} returns silently x. } \description{ An R function to plot trees similar to those produced by DensiTree. From 6ffc11ec77b904bb23bb6edba100d9c6b99e91e4 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Sat, 1 Jun 2024 07:36:48 +0200 Subject: [PATCH 201/216] reorganize networx code --- R/networx.R | 578 ----------------------------------------------- R/plot_networx.R | 575 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 575 insertions(+), 578 deletions(-) create mode 100644 R/plot_networx.R diff --git a/R/networx.R b/R/networx.R index ff2c874b..f2784354 100644 --- a/R/networx.R +++ b/R/networx.R @@ -439,581 +439,3 @@ reorder.networx <- function(x, order = "cladewise", index.only = FALSE, ...) { attr(x, "order") <- order x } - - -# some trigonemetric functions -rad2deg <- function(rad) (rad * 180) / (pi) -deg2rad <- function(deg) (deg * pi) / (180) - -# circular mean -# https://en.wikipedia.org/wiki/Mean_of_circular_quantities -circ.mean <- function(deg) { - rad.m <- (deg * pi) / (180) - mean.cos <- mean(cos(rad.m)) - mean.sin <- mean(sin(rad.m)) - - theta <- rad2deg(atan(mean.sin / mean.cos)) - if (mean.cos < 0) theta <- theta + 180 - if ((mean.sin < 0) & (mean.cos > 0)) theta <- theta + 360 - theta -} - - -spl2angle <- function(x) { - l <- length(attr(x, "labels")) - ord <- 1:l - if (!is.null(attr(x, "cycle"))) ord <- attr(x, "cycle") - x <- changeOrder(x, attr(x, "labels")[ord]) - y <- lapply(x, function(x, l) (x - 1) / l * 360, l = l) - angle <- vapply(y, circ.mean, 0) |> deg2rad() - # angle <- ((vapply(x, sum, 0) / lengths(x) - 1) / l ) * 2*pi - # kreis2kart(attr(x, "weight"), angle) - angle -} - - -coords.equal.angle <- function(obj) { - if (is.null(attr(obj, "order")) || (attr(obj, "order") == "postorder")) - obj <- reorder.networx(obj) - spl <- obj$splits - spl <- SHORTwise(spl) #, length(obj$tip.label)) - l <- length(obj$edge.length) -# ind1 <- which(!duplicated(obj$splitIndex)) - n <- max(obj$edge) - angle <- spl2angle(spl) - weight <- attr(spl, "weight") - k <- matrix(0, max(obj$splitIndex), 2) - - res <- matrix(0, max(obj$edge), 2) - for (i in 1:l) { # unique(obj$splitIndex) - j <- obj$edge[i, 1] - m <- obj$edge[i, 2] - p <- obj$splitIndex[i] - res[m, ] <- res[j, ] + kreis2kart(weight[p], angle[p]) - } - res -} - - -#' @rdname phangorn-internal -#' @export -coords <- function(obj, dim = "3D") { - # if(is.null(attr(obj,"order")) || (attr(obj, "order")=="postorder") ) - # obj = reorder.networx(obj) - if (dim == "equal_angle") return(coords.equal.angle(obj)) - - l <- length(obj$edge.length) - ind1 <- which(!duplicated(obj$splitIndex)) - - n <- max(obj$edge) - adj <- spMatrix(n, n, i = obj$edge[, 2], j = obj$edge[, 1], - x = rep(1, length(obj$edge.length))) - g <- graph_from_adjacency_matrix(adj, "undirected") - if (dim == "3D") { - coord <- layout_nicely(g, dim = 3) - k <- matrix(0, max(obj$splitIndex), 3) - for (i in ind1) { - tmp <- coord[obj$edge[i, 2], ] - coord[obj$edge[i, 1], ] - k[obj$splitIndex[i], ] <- kart2kugel(tmp[1], tmp[2], tmp[3]) - } - k[obj$splitIndex[ind1], 1] <- obj$edge.length[ind1] - - res <- matrix(0, vcount(g), 3) - for (i in 1:l) { - j <- obj$edge[i, 1] - m <- obj$edge[i, 2] - p <- obj$splitIndex[i] - res[m, ] <- res[j, ] + kugel2kart(k[p, 1], k[p, 2], k[p, 3]) - } - } - else { - coord <- layout_nicely(g, dim = 2) - k <- matrix(0, max(obj$splitIndex), 2) - tmp <- coord[obj$edge[ind1, 2], ] - coord[obj$edge[ind1, 1], ] - k[obj$splitIndex[ind1], ] <- kart2kreis(tmp[,1], tmp[,2]) -# for (i in ind1) { -# tmp <- coord[obj$edge[i, 2], ] - coord[obj$edge[i, 1], ] -# k[obj$splitIndex[i], ] <- kart2kreis(tmp[1], tmp[2]) -# } - k[obj$splitIndex[ind1], 1] <- obj$edge.length[ind1] - res <- matrix(0, vcount(g), 2) - for (i in 1:l) { - j <- obj$edge[i, 1] - m <- obj$edge[i, 2] - p <- obj$splitIndex[i] - res[m, ] <- res[j, ] + kreis2kart(k[p, 1], k[p, 2]) - } - } - res -} - - -kart2kugel <- function(x, y, z) { - r <- sqrt(x * x + y * y + z * z) - alpha <- atan(sqrt(x * x + y * y) / z) - if (z < 0) alpha <- alpha + pi - beta <- atan(y / x) - if (x < 0) beta <- beta + pi - c(r, alpha, beta) -} - - -kart2kreis <- function(x, y) { - r <- sqrt(x * x + y * y) - alpha <- atan(y / x) - #if (x < 0) alpha <- alpha + pi - if (any(x < 0)) alpha[x < 0] <- alpha[x < 0] + pi - cbind(r, alpha) -} - - -kreis2kart <- function(r, alpha) { - c(r * cos(alpha), r * sin(alpha)) - # if(length(r)>1) return(matrix(c(r*cos(alpha), r*sin(alpha)), ncol=2)) - # else return(c(r*cos(alpha), r*sin(alpha))) -} - - -kugel2kart <- function(r, alpha, beta) { - x <- r * sin(alpha) * cos(beta) - y <- r * sin(alpha) * sin(beta) - z <- r * cos(alpha) - c(x, y, z) -} - - -edgeLabels <- function(xx, yy, zz = NULL, edge) { - XX <- (xx[edge[, 1]] + xx[edge[, 2]]) / 2 - YY <- (yy[edge[, 1]] + yy[edge[, 2]]) / 2 - if (!is.null(zz)) { - ZZ <- (zz[edge[, 1]] + zz[edge[, 2]]) / 2 - return(cbind(XX, YY, ZZ)) - } - cbind(XX, YY) -} - - -rotate_matrix <- function(x, theta){ - rot_matrix <- matrix(c(cos(theta), sin(theta), -sin(theta), cos(theta)), - 2, 2, byrow = TRUE) - x %*% rot_matrix -} - - -#' plot phylogenetic networks -#' -#' So far not all parameters behave the same on the the \code{rgl} \code{"3D"} -#' and basic graphic \code{"2D"} device. -#' -#' Often it is easier and safer to supply vectors of graphical parameters for -#' splits (e.g. splits.color) than for edges. These overwrite values edge.color. -#' -#' @param x an object of class \code{"networx"} -#' @param type "3D" to plot using rgl or "equal angle" and "2D" in the normal -#' device. -#' @param use.edge.length a logical indicating whether to use the edge weights -#' of the network to draw the branches (the default) or not. -#' @param show.tip.label a logical indicating whether to show the tip labels on -#' the graph (defaults to \code{TRUE}, i.e. the labels are shown). -#' @param show.edge.label a logical indicating whether to show the tip labels -#' on the graph. -#' @param edge.label an additional vector of edge labels (normally not needed). -#' @param show.node.label a logical indicating whether to show the node labels -#' (see example). -#' @param node.label an additional vector of node labels (normally not needed). -#' @param show.nodes a logical indicating whether to show the nodes (see -#' example). -#' @param tip.color the colors used for the tip labels. -#' @param edge.color the colors used to draw edges. -#' @param edge.width the width used to draw edges. -#' @param edge.lty a vector of line types. -#' @param split.color the colors used to draw edges. -#' @param split.width the width used to draw edges. -#' @param split.lty a vector of line types. -#' @param font an integer specifying the type of font for the labels: 1 (plain -#' text), 2 (bold), 3 (italic, the default), or 4 (bold italic). -#' @param cex a numeric value giving the factor scaling of the labels. -#' @param cex.node.label a numeric value giving the factor scaling of the node -#' labels. -#' @param cex.edge.label a numeric value giving the factor scaling of the edge -#' labels. -#' @param col.node.label the colors used for the node labels. -#' @param col.edge.label the colors used for the edge labels. -#' @param font.node.label the font used for the node labels. -#' @param font.edge.label the font used for the edge labels. -#' @param underscore a logical specifying whether the underscores in tip labels -#' should be written as spaces (the default) or left as are (if TRUE). -#' @param angle rotate the plot. -#' @param digits if edge labels are numerical a positive integer indicating how -#' many significant digits are to be used. -#' @param \dots Further arguments passed to or from other methods. -#' @returns \code{plot.networx} returns invisibly a list with paramters of the -#' plot. -#' @rdname plot.networx -#' @note The internal representation is likely to change. -#' @author Klaus Schliep \email{klaus.schliep@@gmail.com} -#' @seealso \code{\link{consensusNet}}, \code{\link{neighborNet}}, -#' \code{\link{splitsNetwork}}, \code{\link{hadamard}}, -#' \code{\link{distanceHadamard}}, \code{\link{as.networx}}, -#' \code{\link[ape]{evonet}}, \code{\link[ape]{as.phylo}}, -#' \code{\link{densiTree}}, \code{\link[ape]{nodelabels}} -#' @references Dress, A.W.M. and Huson, D.H. (2004) Constructing Splits Graphs -#' \emph{IEEE/ACM Transactions on Computational Biology and Bioinformatics -#' (TCBB)}, \bold{1(3)}, 109--115 -#' -#' Schliep, K., Potts, A. J., Morrison, D. A. and Grimm, G. W. (2017), -#' Intertwining phylogenetic trees and networks. \emph{Methods Ecol Evol}. -#' \bold{8}, 1212--1220. doi:10.1111/2041-210X.12760 -#' @keywords plot -#' @importFrom igraph make_graph -#' @examples -#' -#' set.seed(1) -#' tree1 <- rtree(20, rooted=FALSE) -#' sp <- as.splits(rNNI(tree1, n=10)) -#' net <- as.networx(sp) -#' plot(net) -#' plot(net, direction="axial") -#' \dontrun{ -#' # also see example in consensusNet -#' example(consensusNet) -#' } -#' @importFrom igraph graph_from_adjacency_matrix vcount topo_sort layout_nicely -#' @method plot networx -#' @export -plot.networx <- function(x, type = "equal angle", use.edge.length = TRUE, - show.tip.label = TRUE, show.edge.label = FALSE, - edge.label = NULL, show.node.label = FALSE, - node.label = NULL, show.nodes = FALSE, - tip.color = "black", edge.color = "black", - edge.width = 3, edge.lty = 1, split.color = NULL, - split.width = NULL, split.lty = NULL, font = 3, - cex = par("cex"), cex.node.label = cex, - cex.edge.label = cex, col.node.label = tip.color, - col.edge.label = tip.color, font.node.label = font, - font.edge.label = font, underscore = FALSE, - angle=0, digits=3, ...) { - type <- match.arg(type, c("equal angle", "3D", "2D")) - if (use.edge.length == FALSE){ - x$edge.length[] <- 1 - attr(x$splits, "weight") <- rep(1, length(x$splits)) - } - nTips <- length(x$tip.label) - conf <- attr(x$splits, "confidences") - index <- x$splitIndex - if(!is.null(edge.label) && is.numeric(edge.label)) edge.label <- prettyNum(edge.label) - if (is.null(edge.label) && !is.null(conf)) { - conf <- conf[index] - if(is.numeric(conf)) conf <- prettyNum(format(conf, digits=digits)) - if (!is.null(x$translate)) conf[match(x$translate$node, x$edge[, 2])] <- "" - else conf[x$edge[, 2] <= nTips] <- "" - edge.label <- conf - } - if (is.null(node.label)) node.label <- as.character(1:max(x$edge)) - if (show.tip.label) node.label[1:nTips] <- "" - if (show.tip.label){ - if (is.expression(x$tip.label)) underscore <- TRUE - if (!underscore) x$tip.label <- gsub("_", " ", x$tip.label) - } - - lspl <- max(x$splitIndex) - if (!is.null(split.color)) { - if (length(split.color) != lspl) - stop("split.color must be same length as splits") - else edge.color <- split.color[x$splitIndex] - } - if (!is.null(split.width)) { - if (length(split.width) != lspl) - stop("split.color must be same length as splits") - else edge.width <- split.width[x$splitIndex] - } - if (!is.null(split.lty)) { - if (length(split.lty) != lspl) - stop("split.color must be same length as splits") - else edge.lty <- split.lty[x$splitIndex] - } - - chk <- FALSE - - if (type == "3D") chk <- requireNamespace("rgl", quietly = TRUE) - if (!chk && type == "3D") { - warning("type='3D' requires the package 'rgl', plotting in '2D' instead!\n") - type <- "2D" - } - # use precomputed vertices when available - coord <- NULL - if (!is.null(x$.plot)) coord <- x$.plot$vertices - - if (type == "3D") { - if (is.null(coord) || ncol(coord) != 3) - coord <- coords(x, dim = "3D") - plotRGL(coord, x, show.tip.label = show.tip.label, - show.edge.label = show.edge.label, edge.label = edge.label, - show.node.label = show.node.label, node.label = node.label, - show.nodes = show.nodes, tip.color = tip.color, edge.color = edge.color, - edge.width = edge.width, font = font, cex = cex, - cex.node.label = cex.node.label, cex.edge.label = cex.edge.label, - col.node.label = col.node.label, col.edge.label = col.edge.label, - font.node.label = font.node.label, font.edge.label = font.edge.label) - } - else { - if (is.null(coord) || ncol(coord) != 2) { - if (type == "equal angle") coord <- coords.equal.angle(x) - else coord <- coords(x, dim = "2D") - } - if(angle != 0){ - angle <- angle * pi/180 # - coord <- rotate_matrix(coord, angle) - } - plot2D(coord, x, show.tip.label = show.tip.label, - show.edge.label = show.edge.label, edge.label = edge.label, - show.node.label = show.node.label, node.label = node.label, - show.nodes = show.nodes, tip.color = tip.color, edge.color = edge.color, - edge.width = edge.width, edge.lty = edge.lty, font = font, cex = cex, - cex.node.label = cex.node.label, cex.edge.label = cex.edge.label, - col.node.label = col.node.label, col.edge.label = col.edge.label, - font.node.label = font.node.label, font.edge.label = font.edge.label, - add = FALSE, ...) - } - x$.plot <- list(vertices = coord, edge.color = edge.color, - edge.width = edge.width, edge.lty = edge.lty) - invisible(x) -} - - -plotRGL <- function(coords, net, show.tip.label = TRUE, show.edge.label = FALSE, - edge.label = NULL, show.node.label = FALSE, - node.label = NULL, show.nodes = FALSE, tip.color = "blue", - edge.color = "grey", edge.width = 3, font = 3, - cex = par("cex"), cex.node.label = cex, - cex.edge.label = cex, col.node.label = tip.color, - col.edge.label = tip.color, font.node.label = font, - font.edge.label = font, ...) { - open3d <- rgl::open3d - segments3d <- rgl::segments3d - spheres3d <- rgl::spheres3d - texts3d <- rgl::texts3d - - edge <- net$edge - - x <- coords[, 1] - y <- coords[, 2] - z <- coords[, 3] - - nTips <- length(net$tip.label) - - segments3d(x[t(edge)], y[t(edge)], z[t(edge)], - col = rep(edge.color, each = 2), lwd = edge.width) - radius <- 0 - if (show.nodes) { - radius <- sqrt( (max(x) - min(x))^2 + (max(y) - min(y))^2 + - (max(z) - min(z))^2) / 200 - spheres3d(x[1:nTips], y[1:nTips], z[1:nTips], radius = 2 * radius, - color = "cyan") - spheres3d(x[-c(1:nTips)], y[-c(1:nTips)], z[-c(1:nTips)], radius = radius, - color = "magenta") - } - if (show.tip.label) { - if (is.null(net$translate)) - texts3d(x[1:nTips] + 2.05 * radius, y[1:nTips], z[1:nTips], - net$tip.label, color = tip.color, cex = cex, font = font) - else - texts3d(x[net$translate$node] + 2.05 * radius, y[net$translate$node], - z[net$translate$node], net$tip.label, color = tip.color, cex = cex, - font = font) - } - if (show.edge.label) { - ec <- edgeLabels(x, y, z, edge) - if (is.null(edge.label)) edge.label <- net$splitIndex - # else edge.label = net$splitIndex - texts3d(ec[, 1], ec[, 2], ec[, 3], edge.label, color = col.edge.label, - cex = cex.edge.label, font = font.edge.label) - } - if (show.node.label) { - texts3d(x, y, z, node.label, color = col.node.label, cex = cex.node.label, - font = font.node.label) - } -} - - -plot2D <- function(coords, net, show.tip.label = TRUE, show.edge.label = FALSE, - edge.label = NULL, show.node.label = FALSE, - node.label = NULL, tip.color = "blue", edge.color = "grey", - edge.width = 3, edge.lty = 1, font = 3, cex = par("cex"), - cex.node.label = cex, cex.edge.label = cex, - col.node.label = tip.color, col.edge.label = tip.color, - font.node.label = font, font.edge.label = font, - add = FALSE, direction="horizontal", xlim=NULL, ylim=NULL, - ...) { - edge <- net$edge - label <- net$tip.label - xx <- coords[, 1] - yy <- coords[, 2] - nTips <- length(label) - if(is.null(xlim)){ - xlim <- range(xx) - offset <- max(nchar(label)) * 0.018 * cex * diff(xlim) - if (show.tip.label) xlim <- c(xlim[1] - offset, xlim[2] + offset) - } - if(is.null(ylim)){ - ylim <- range(yy) - if (show.tip.label) ylim <- c(ylim[1] - 0.03 * cex * diff(ylim), - ylim[2] + 0.03 * cex * diff(ylim)) - } - direction <- match.arg(direction, c("horizontal", "axial")) -# if (show.tip.label) { -# offset <- max(nchar(label)) * 0.018 * cex * diff(xlim) -# xlim <- c(xlim[1] - offset, xlim[2] + offset) -# ylim <- c(ylim[1] - 0.03 * cex * diff(ylim), ylim[2] + -# 0.03 * cex * diff(ylim)) -# } - if (!add) { - plot.new() - plot.window(xlim, ylim, asp = 1) - } - cladogram.plot(edge, xx, yy, edge.color, edge.width, edge.lty) - if (show.tip.label) { - if (is.null(net$translate)) ind <- match(1:nTips, edge[, 2]) - else ind <- match(net$translate$node, edge[, 2]) - if(direction=="horizontal"){ - pos <- rep(4, nTips) - XX <- xx[edge[ind, 1]] - xx[edge[ind, 2]] - pos[XX > 0] <- 2 - YY <- yy[edge[ind, 1]] - yy[edge[ind, 2]] - pos2 <- rep(3, nTips) - pos2[YY > 0] <- 1 - # needed if tiplabels are not at internal nodes - XX[is.na(XX)] <- 0 - YY[is.na(YY)] <- 0 - pos[abs(YY) > abs(XX)] <- pos2[abs(YY) > abs(XX)] - if (is.null(net$translate)) text(xx[1:nTips], yy[1:nTips], labels = label, - pos = pos, col = tip.color, cex = cex, font = font) - else text(xx[net$translate$node], yy[net$translate$node], labels = label, - pos = pos, col = tip.color, cex = cex, font = font) - } - else { - XX <- xx[edge[ind, 2]] - xx[edge[ind, 1]] - YY <- yy[edge[ind, 2]] - yy[edge[ind, 1]] - angle <- kart2kreis(XX, YY)[,2] - adj <- abs(angle) > pi/2 - angle <- angle * 180/pi # switch to degrees - angle[adj] <- angle[adj] - 180 - adj <- as.numeric(adj) - ## `srt' takes only a single value, so can't vectorize this: - ## (and need to 'elongate' these vectors:) - font <- rep(font, length.out = nTips) - tip.color <- rep(tip.color, length.out = nTips) - cex <- rep(cex, length.out = nTips) - for (i in seq_along(label)) - text(xx[i], yy[i], label[i], font = font[i], - cex = cex[i], srt = angle[i], adj = adj[i], - col = tip.color[i]) - } - } - - if (show.edge.label) { - ec <- edgeLabels(xx, yy, edge = edge) - if (is.null(edge.label)) edge.label <- net$splitIndex - - # show only one edge label - em <- apply(ec, 1, function(x) max(abs(x))) - si <- net$splitIndex - for (i in unique(si)) { - tmp <- si == i - if (sum(tmp) > 1) { - w <- which(tmp) - wm <- which.max(em[w]) - edge.label[w[-wm]] <- "" - } - } - - text(ec[, 1], ec[, 2], labels = edge.label, col = col.edge.label, - cex = cex.edge.label, font = font.edge.label) - } - if (show.node.label) { - text(xx, yy, labels = node.label, col = col.node.label, - cex = cex.node.label, font = font.node.label) - } - PP <- list(Ntip = nTips, type = "networx", edge = net$edge, xx = coords[, 1], - yy = coords[, 2], x.lim=xlim, y.lim=ylim) - assign("last_plot.phylo", PP, envir = .PlotPhyloEnv) -} - - -closest.edge <- function(x, y, P1, P2) { - x1 <- P1[, 1] - x2 <- P2[, 1] - y1 <- P1[, 2] - y2 <- P2[, 2] - - A <- sqrt( (x2 - x)^2 + (y2 - y)^2) # d_BC - B <- sqrt( (x1 - x)^2 + (y1 - y)^2) # d_AC - C <- sqrt( (x1 - x2)^2 + (y1 - y2)^2) # d_AB - # Kosinussatz - alpha <- acos( (B^2 + C^2 - A^2) / (2 * B * C)) - beta <- acos( (A^2 + C^2 - B^2) / (2 * A * C)) - - d <- abs( (y2 - y1) * x - (x2 - x1) * y + x2 * y1 - y2 * x1) / - sqrt( (y2 - y1)^2 + (x2 - x1)^2) - d[alpha > (pi / 2)] <- B[alpha > (pi / 2)] - d[beta > (pi / 2)] <- A[beta > (pi / 2)] - d -} - -closest.node <- function(x, y, P) { - x1 <- P[, 1] - y1 <- P[, 2] - d <- sqrt((x1 - x)^2 + (y1 - y)^2) - d -} - - -#' Identify splits in a network -#' -#' \code{identify.networx} reads the position of the graphics pointer when the -#' mouse button is pressed. It then returns the split belonging to the edge -#' closest to the pointer. The network must be plotted beforehand. -#' -#' @param x an object of class \code{networx} -#' @param quiet a logical controlling whether to print a message inviting the -#' user to click on the tree. -#' @param \dots further arguments to be passed to or from other methods. -#' @return \code{identify.networx} returns a splits object. -#' @author Klaus Schliep \email{klaus.schliep@@gmail.com} -#' @seealso \code{\link[phangorn]{plot.networx}}, -#' \code{\link[graphics]{identify}} -#' @examples -#' \dontrun{ -#' data(yeast) -#' dm <- dist.ml(yeast) -#' nnet <- neighborNet(dm) -#' plot(nnet) -#' identify(nnet) # click close to an edge -#' } -#' @importFrom graphics identify -#' @method identify networx -#' @export -identify.networx <- function(x, quiet = FALSE, ...) { - if (!quiet) - cat("Click close to a node or edge of the tree...\n") - xy <- locator(1) - if (is.null(xy)) - return(NULL) - if (is.null(x$.plot)) { - lastPP <- get("last_plot.phylo", envir = .PlotPhyloEnv) - edge <- lastPP$edge - xx <- lastPP$xx - yy <- lastPP$yy - vertices <- cbind(xx, yy) - } - else { - lastPP <- x$.plot - edge <- x$edge - vertices <- lastPP$vertices - } - P1 <- vertices[edge[, 1], , drop = FALSE] - P2 <- vertices[edge[, 2], , drop = FALSE] - d <- closest.edge(xy$x, xy$y, P1, P2) - split <- x$splitIndex[which.min(d)] - x$splits[split] -} diff --git a/R/plot_networx.R b/R/plot_networx.R new file mode 100644 index 00000000..70de35f9 --- /dev/null +++ b/R/plot_networx.R @@ -0,0 +1,575 @@ +# some trigonemetric functions +rad2deg <- function(rad) (rad * 180) / (pi) +deg2rad <- function(deg) (deg * pi) / (180) + +# circular mean +# https://en.wikipedia.org/wiki/Mean_of_circular_quantities +circ.mean <- function(deg) { + rad.m <- (deg * pi) / (180) + mean.cos <- mean(cos(rad.m)) + mean.sin <- mean(sin(rad.m)) + + theta <- rad2deg(atan(mean.sin / mean.cos)) + if (mean.cos < 0) theta <- theta + 180 + if ((mean.sin < 0) & (mean.cos > 0)) theta <- theta + 360 + theta +} + + +spl2angle <- function(x) { + l <- length(attr(x, "labels")) + ord <- 1:l + if (!is.null(attr(x, "cycle"))) ord <- attr(x, "cycle") + x <- changeOrder(x, attr(x, "labels")[ord]) + y <- lapply(x, function(x, l) (x - 1) / l * 360, l = l) + angle <- vapply(y, circ.mean, 0) |> deg2rad() + # angle <- ((vapply(x, sum, 0) / lengths(x) - 1) / l ) * 2*pi + # kreis2kart(attr(x, "weight"), angle) + angle +} + + +coords.equal.angle <- function(obj) { + if (is.null(attr(obj, "order")) || (attr(obj, "order") == "postorder")) + obj <- reorder.networx(obj) + spl <- obj$splits + spl <- SHORTwise(spl) #, length(obj$tip.label)) + l <- length(obj$edge.length) + # ind1 <- which(!duplicated(obj$splitIndex)) + n <- max(obj$edge) + angle <- spl2angle(spl) + weight <- attr(spl, "weight") + k <- matrix(0, max(obj$splitIndex), 2) + + res <- matrix(0, max(obj$edge), 2) + for (i in 1:l) { # unique(obj$splitIndex) + j <- obj$edge[i, 1] + m <- obj$edge[i, 2] + p <- obj$splitIndex[i] + res[m, ] <- res[j, ] + kreis2kart(weight[p], angle[p]) + } + res +} + + +#' @rdname phangorn-internal +#' @export +coords <- function(obj, dim = "3D") { + # if(is.null(attr(obj,"order")) || (attr(obj, "order")=="postorder") ) + # obj = reorder.networx(obj) + if (dim == "equal_angle") return(coords.equal.angle(obj)) + + l <- length(obj$edge.length) + ind1 <- which(!duplicated(obj$splitIndex)) + + n <- max(obj$edge) + adj <- spMatrix(n, n, i = obj$edge[, 2], j = obj$edge[, 1], + x = rep(1, length(obj$edge.length))) + g <- graph_from_adjacency_matrix(adj, "undirected") + if (dim == "3D") { + coord <- layout_nicely(g, dim = 3) + k <- matrix(0, max(obj$splitIndex), 3) + for (i in ind1) { + tmp <- coord[obj$edge[i, 2], ] - coord[obj$edge[i, 1], ] + k[obj$splitIndex[i], ] <- kart2kugel(tmp[1], tmp[2], tmp[3]) + } + k[obj$splitIndex[ind1], 1] <- obj$edge.length[ind1] + + res <- matrix(0, vcount(g), 3) + for (i in 1:l) { + j <- obj$edge[i, 1] + m <- obj$edge[i, 2] + p <- obj$splitIndex[i] + res[m, ] <- res[j, ] + kugel2kart(k[p, 1], k[p, 2], k[p, 3]) + } + } + else { + coord <- layout_nicely(g, dim = 2) + k <- matrix(0, max(obj$splitIndex), 2) + tmp <- coord[obj$edge[ind1, 2], ] - coord[obj$edge[ind1, 1], ] + k[obj$splitIndex[ind1], ] <- kart2kreis(tmp[,1], tmp[,2]) + # for (i in ind1) { + # tmp <- coord[obj$edge[i, 2], ] - coord[obj$edge[i, 1], ] + # k[obj$splitIndex[i], ] <- kart2kreis(tmp[1], tmp[2]) + # } + k[obj$splitIndex[ind1], 1] <- obj$edge.length[ind1] + res <- matrix(0, vcount(g), 2) + for (i in 1:l) { + j <- obj$edge[i, 1] + m <- obj$edge[i, 2] + p <- obj$splitIndex[i] + res[m, ] <- res[j, ] + kreis2kart(k[p, 1], k[p, 2]) + } + } + res +} + + +kart2kugel <- function(x, y, z) { + r <- sqrt(x * x + y * y + z * z) + alpha <- atan(sqrt(x * x + y * y) / z) + if (z < 0) alpha <- alpha + pi + beta <- atan(y / x) + if (x < 0) beta <- beta + pi + c(r, alpha, beta) +} + + +kart2kreis <- function(x, y) { + r <- sqrt(x * x + y * y) + alpha <- atan(y / x) + #if (x < 0) alpha <- alpha + pi + if (any(x < 0)) alpha[x < 0] <- alpha[x < 0] + pi + cbind(r, alpha) +} + + +kreis2kart <- function(r, alpha) { + c(r * cos(alpha), r * sin(alpha)) + # if(length(r)>1) return(matrix(c(r*cos(alpha), r*sin(alpha)), ncol=2)) + # else return(c(r*cos(alpha), r*sin(alpha))) +} + + +kugel2kart <- function(r, alpha, beta) { + x <- r * sin(alpha) * cos(beta) + y <- r * sin(alpha) * sin(beta) + z <- r * cos(alpha) + c(x, y, z) +} + + +edgeLabels <- function(xx, yy, zz = NULL, edge) { + XX <- (xx[edge[, 1]] + xx[edge[, 2]]) / 2 + YY <- (yy[edge[, 1]] + yy[edge[, 2]]) / 2 + if (!is.null(zz)) { + ZZ <- (zz[edge[, 1]] + zz[edge[, 2]]) / 2 + return(cbind(XX, YY, ZZ)) + } + cbind(XX, YY) +} + + +rotate_matrix <- function(x, theta){ + rot_matrix <- matrix(c(cos(theta), sin(theta), -sin(theta), cos(theta)), + 2, 2, byrow = TRUE) + x %*% rot_matrix +} + + +#' plot phylogenetic networks +#' +#' So far not all parameters behave the same on the the \code{rgl} \code{"3D"} +#' and basic graphic \code{"2D"} device. +#' +#' Often it is easier and safer to supply vectors of graphical parameters for +#' splits (e.g. splits.color) than for edges. These overwrite values edge.color. +#' +#' @param x an object of class \code{"networx"} +#' @param type "3D" to plot using rgl or "equal angle" and "2D" in the normal +#' device. +#' @param use.edge.length a logical indicating whether to use the edge weights +#' of the network to draw the branches (the default) or not. +#' @param show.tip.label a logical indicating whether to show the tip labels on +#' the graph (defaults to \code{TRUE}, i.e. the labels are shown). +#' @param show.edge.label a logical indicating whether to show the tip labels +#' on the graph. +#' @param edge.label an additional vector of edge labels (normally not needed). +#' @param show.node.label a logical indicating whether to show the node labels +#' (see example). +#' @param node.label an additional vector of node labels (normally not needed). +#' @param show.nodes a logical indicating whether to show the nodes (see +#' example). +#' @param tip.color the colors used for the tip labels. +#' @param edge.color the colors used to draw edges. +#' @param edge.width the width used to draw edges. +#' @param edge.lty a vector of line types. +#' @param split.color the colors used to draw edges. +#' @param split.width the width used to draw edges. +#' @param split.lty a vector of line types. +#' @param font an integer specifying the type of font for the labels: 1 (plain +#' text), 2 (bold), 3 (italic, the default), or 4 (bold italic). +#' @param cex a numeric value giving the factor scaling of the labels. +#' @param cex.node.label a numeric value giving the factor scaling of the node +#' labels. +#' @param cex.edge.label a numeric value giving the factor scaling of the edge +#' labels. +#' @param col.node.label the colors used for the node labels. +#' @param col.edge.label the colors used for the edge labels. +#' @param font.node.label the font used for the node labels. +#' @param font.edge.label the font used for the edge labels. +#' @param underscore a logical specifying whether the underscores in tip labels +#' should be written as spaces (the default) or left as are (if TRUE). +#' @param angle rotate the plot. +#' @param digits if edge labels are numerical a positive integer indicating how +#' many significant digits are to be used. +#' @param \dots Further arguments passed to or from other methods. +#' @returns \code{plot.networx} returns invisibly a list with paramters of the +#' plot. +#' @rdname plot.networx +#' @note The internal representation is likely to change. +#' @author Klaus Schliep \email{klaus.schliep@@gmail.com} +#' @seealso \code{\link{consensusNet}}, \code{\link{neighborNet}}, +#' \code{\link{splitsNetwork}}, \code{\link{hadamard}}, +#' \code{\link{distanceHadamard}}, \code{\link{as.networx}}, +#' \code{\link[ape]{evonet}}, \code{\link[ape]{as.phylo}}, +#' \code{\link{densiTree}}, \code{\link[ape]{nodelabels}} +#' @references Dress, A.W.M. and Huson, D.H. (2004) Constructing Splits Graphs +#' \emph{IEEE/ACM Transactions on Computational Biology and Bioinformatics +#' (TCBB)}, \bold{1(3)}, 109--115 +#' +#' Schliep, K., Potts, A. J., Morrison, D. A. and Grimm, G. W. (2017), +#' Intertwining phylogenetic trees and networks. \emph{Methods Ecol Evol}. +#' \bold{8}, 1212--1220. doi:10.1111/2041-210X.12760 +#' @keywords plot +#' @importFrom igraph make_graph +#' @examples +#' +#' set.seed(1) +#' tree1 <- rtree(20, rooted=FALSE) +#' sp <- as.splits(rNNI(tree1, n=10)) +#' net <- as.networx(sp) +#' plot(net) +#' plot(net, direction="axial") +#' \dontrun{ +#' # also see example in consensusNet +#' example(consensusNet) +#' } +#' @importFrom igraph graph_from_adjacency_matrix vcount topo_sort layout_nicely +#' @method plot networx +#' @export +plot.networx <- function(x, type = "equal angle", use.edge.length = TRUE, + show.tip.label = TRUE, show.edge.label = FALSE, + edge.label = NULL, show.node.label = FALSE, + node.label = NULL, show.nodes = FALSE, + tip.color = "black", edge.color = "black", + edge.width = 3, edge.lty = 1, split.color = NULL, + split.width = NULL, split.lty = NULL, font = 3, + cex = par("cex"), cex.node.label = cex, + cex.edge.label = cex, col.node.label = tip.color, + col.edge.label = tip.color, font.node.label = font, + font.edge.label = font, underscore = FALSE, + angle=0, digits=3, ...) { + type <- match.arg(type, c("equal angle", "3D", "2D")) + if (use.edge.length == FALSE){ + x$edge.length[] <- 1 + attr(x$splits, "weight") <- rep(1, length(x$splits)) + } + nTips <- length(x$tip.label) + conf <- attr(x$splits, "confidences") + index <- x$splitIndex + if(!is.null(edge.label) && is.numeric(edge.label)) edge.label <- prettyNum(edge.label) + if (is.null(edge.label) && !is.null(conf)) { + conf <- conf[index] + if(is.numeric(conf)) conf <- prettyNum(format(conf, digits=digits)) + if (!is.null(x$translate)) conf[match(x$translate$node, x$edge[, 2])] <- "" + else conf[x$edge[, 2] <= nTips] <- "" + edge.label <- conf + } + if (is.null(node.label)) node.label <- as.character(1:max(x$edge)) + if (show.tip.label) node.label[1:nTips] <- "" + if (show.tip.label){ + if (is.expression(x$tip.label)) underscore <- TRUE + if (!underscore) x$tip.label <- gsub("_", " ", x$tip.label) + } + + lspl <- max(x$splitIndex) + if (!is.null(split.color)) { + if (length(split.color) != lspl) + stop("split.color must be same length as splits") + else edge.color <- split.color[x$splitIndex] + } + if (!is.null(split.width)) { + if (length(split.width) != lspl) + stop("split.color must be same length as splits") + else edge.width <- split.width[x$splitIndex] + } + if (!is.null(split.lty)) { + if (length(split.lty) != lspl) + stop("split.color must be same length as splits") + else edge.lty <- split.lty[x$splitIndex] + } + + chk <- FALSE + + if (type == "3D") chk <- requireNamespace("rgl", quietly = TRUE) + if (!chk && type == "3D") { + warning("type='3D' requires the package 'rgl', plotting in '2D' instead!\n") + type <- "2D" + } + # use precomputed vertices when available + coord <- NULL + if (!is.null(x$.plot)) coord <- x$.plot$vertices + + if (type == "3D") { + if (is.null(coord) || ncol(coord) != 3) + coord <- coords(x, dim = "3D") + plotRGL(coord, x, show.tip.label = show.tip.label, + show.edge.label = show.edge.label, edge.label = edge.label, + show.node.label = show.node.label, node.label = node.label, + show.nodes = show.nodes, tip.color = tip.color, edge.color = edge.color, + edge.width = edge.width, font = font, cex = cex, + cex.node.label = cex.node.label, cex.edge.label = cex.edge.label, + col.node.label = col.node.label, col.edge.label = col.edge.label, + font.node.label = font.node.label, font.edge.label = font.edge.label) + } + else { + if (is.null(coord) || ncol(coord) != 2) { + if (type == "equal angle") coord <- coords.equal.angle(x) + else coord <- coords(x, dim = "2D") + } + if(angle != 0){ + angle <- angle * pi/180 # + coord <- rotate_matrix(coord, angle) + } + plot2D(coord, x, show.tip.label = show.tip.label, + show.edge.label = show.edge.label, edge.label = edge.label, + show.node.label = show.node.label, node.label = node.label, + show.nodes = show.nodes, tip.color = tip.color, edge.color = edge.color, + edge.width = edge.width, edge.lty = edge.lty, font = font, cex = cex, + cex.node.label = cex.node.label, cex.edge.label = cex.edge.label, + col.node.label = col.node.label, col.edge.label = col.edge.label, + font.node.label = font.node.label, font.edge.label = font.edge.label, + add = FALSE, ...) + } + x$.plot <- list(vertices = coord, edge.color = edge.color, + edge.width = edge.width, edge.lty = edge.lty) + invisible(x) +} + + +plotRGL <- function(coords, net, show.tip.label = TRUE, show.edge.label = FALSE, + edge.label = NULL, show.node.label = FALSE, + node.label = NULL, show.nodes = FALSE, tip.color = "blue", + edge.color = "grey", edge.width = 3, font = 3, + cex = par("cex"), cex.node.label = cex, + cex.edge.label = cex, col.node.label = tip.color, + col.edge.label = tip.color, font.node.label = font, + font.edge.label = font, ...) { + open3d <- rgl::open3d + segments3d <- rgl::segments3d + spheres3d <- rgl::spheres3d + texts3d <- rgl::texts3d + + edge <- net$edge + + x <- coords[, 1] + y <- coords[, 2] + z <- coords[, 3] + + nTips <- length(net$tip.label) + + segments3d(x[t(edge)], y[t(edge)], z[t(edge)], + col = rep(edge.color, each = 2), lwd = edge.width) + radius <- 0 + if (show.nodes) { + radius <- sqrt( (max(x) - min(x))^2 + (max(y) - min(y))^2 + + (max(z) - min(z))^2) / 200 + spheres3d(x[1:nTips], y[1:nTips], z[1:nTips], radius = 2 * radius, + color = "cyan") + spheres3d(x[-c(1:nTips)], y[-c(1:nTips)], z[-c(1:nTips)], radius = radius, + color = "magenta") + } + if (show.tip.label) { + if (is.null(net$translate)) + texts3d(x[1:nTips] + 2.05 * radius, y[1:nTips], z[1:nTips], + net$tip.label, color = tip.color, cex = cex, font = font) + else + texts3d(x[net$translate$node] + 2.05 * radius, y[net$translate$node], + z[net$translate$node], net$tip.label, color = tip.color, cex = cex, + font = font) + } + if (show.edge.label) { + ec <- edgeLabels(x, y, z, edge) + if (is.null(edge.label)) edge.label <- net$splitIndex + # else edge.label = net$splitIndex + texts3d(ec[, 1], ec[, 2], ec[, 3], edge.label, color = col.edge.label, + cex = cex.edge.label, font = font.edge.label) + } + if (show.node.label) { + texts3d(x, y, z, node.label, color = col.node.label, cex = cex.node.label, + font = font.node.label) + } +} + + +plot2D <- function(coords, net, show.tip.label = TRUE, show.edge.label = FALSE, + edge.label = NULL, show.node.label = FALSE, + node.label = NULL, tip.color = "blue", edge.color = "grey", + edge.width = 3, edge.lty = 1, font = 3, cex = par("cex"), + cex.node.label = cex, cex.edge.label = cex, + col.node.label = tip.color, col.edge.label = tip.color, + font.node.label = font, font.edge.label = font, + add = FALSE, direction="horizontal", xlim=NULL, ylim=NULL, + ...) { + direction <- match.arg(direction, c("horizontal", "axial")) + edge <- net$edge + label <- net$tip.label + xx <- coords[, 1] + yy <- coords[, 2] + nTips <- length(label) + if(is.null(xlim)){ + xlim <- range(xx) + offset <- max(nchar(label)) * 0.018 * cex * diff(xlim) + if (show.tip.label) xlim <- c(xlim[1] - offset, xlim[2] + offset) + } + if(is.null(ylim)){ + ylim <- range(yy) + if (show.tip.label){ + if(direction=="axial"){ + offset <- max(nchar(label)) * 0.018 * cex * diff(ylim) + ylim <- c(ylim[1] - offset, ylim[2] + offset) + } else ylim <- c(ylim[1] - 0.03 * cex * diff(ylim), + ylim[2] + 0.03 * cex * diff(ylim)) + } + } + if (!add) { + plot.new() + plot.window(xlim, ylim, asp = 1) + } + cladogram.plot(edge, xx, yy, edge.color, edge.width, edge.lty) + if (show.tip.label) { + if (is.null(net$translate)) ind <- match(1:nTips, edge[, 2]) + else ind <- match(net$translate$node, edge[, 2]) + if(direction=="horizontal"){ + pos <- rep(4, nTips) + XX <- xx[edge[ind, 1]] - xx[edge[ind, 2]] + pos[XX > 0] <- 2 + YY <- yy[edge[ind, 1]] - yy[edge[ind, 2]] + pos2 <- rep(3, nTips) + pos2[YY > 0] <- 1 + # needed if tiplabels are not at internal nodes + XX[is.na(XX)] <- 0 + YY[is.na(YY)] <- 0 + pos[abs(YY) > abs(XX)] <- pos2[abs(YY) > abs(XX)] + if (is.null(net$translate)) text(xx[1:nTips], yy[1:nTips], labels = label, + pos = pos, col = tip.color, cex = cex, font = font) + else text(xx[net$translate$node], yy[net$translate$node], labels = label, + pos = pos, col = tip.color, cex = cex, font = font) + } + else { + XX <- xx[edge[ind, 2]] - xx[edge[ind, 1]] + YY <- yy[edge[ind, 2]] - yy[edge[ind, 1]] + angle <- kart2kreis(XX, YY)[,2] + adj <- abs(angle) > pi/2 + angle <- angle * 180/pi # switch to degrees + angle[adj] <- angle[adj] - 180 + adj <- as.numeric(adj) + ## `srt' takes only a single value, so can't vectorize this: + ## (and need to 'elongate' these vectors:) + font <- rep(font, length.out = nTips) + tip.color <- rep(tip.color, length.out = nTips) + cex <- rep(cex, length.out = nTips) + for (i in seq_along(label)) + text(xx[i], yy[i], label[i], font = font[i], + cex = cex[i], srt = angle[i], adj = adj[i], + col = tip.color[i]) + } + } + + if (show.edge.label) { + ec <- edgeLabels(xx, yy, edge = edge) + if (is.null(edge.label)) edge.label <- net$splitIndex + + # show only one edge label + em <- apply(ec, 1, function(x) max(abs(x))) + si <- net$splitIndex + for (i in unique(si)) { + tmp <- si == i + if (sum(tmp) > 1) { + w <- which(tmp) + wm <- which.max(em[w]) + edge.label[w[-wm]] <- "" + } + } + + text(ec[, 1], ec[, 2], labels = edge.label, col = col.edge.label, + cex = cex.edge.label, font = font.edge.label) + } + if (show.node.label) { + text(xx, yy, labels = node.label, col = col.node.label, + cex = cex.node.label, font = font.node.label) + } + PP <- list(Ntip = nTips, type = "networx", edge = net$edge, xx = coords[, 1], + yy = coords[, 2], x.lim=xlim, y.lim=ylim, align.tip.label=FALSE) + assign("last_plot.phylo", PP, envir = .PlotPhyloEnv) +} + + +closest.edge <- function(x, y, P1, P2) { + x1 <- P1[, 1] + x2 <- P2[, 1] + y1 <- P1[, 2] + y2 <- P2[, 2] + + A <- sqrt( (x2 - x)^2 + (y2 - y)^2) # d_BC + B <- sqrt( (x1 - x)^2 + (y1 - y)^2) # d_AC + C <- sqrt( (x1 - x2)^2 + (y1 - y2)^2) # d_AB + # Kosinussatz + alpha <- acos( (B^2 + C^2 - A^2) / (2 * B * C)) + beta <- acos( (A^2 + C^2 - B^2) / (2 * A * C)) + + d <- abs( (y2 - y1) * x - (x2 - x1) * y + x2 * y1 - y2 * x1) / + sqrt( (y2 - y1)^2 + (x2 - x1)^2) + d[alpha > (pi / 2)] <- B[alpha > (pi / 2)] + d[beta > (pi / 2)] <- A[beta > (pi / 2)] + d +} + +closest.node <- function(x, y, P) { + x1 <- P[, 1] + y1 <- P[, 2] + d <- sqrt((x1 - x)^2 + (y1 - y)^2) + d +} + + +#' Identify splits in a network +#' +#' \code{identify.networx} reads the position of the graphics pointer when the +#' mouse button is pressed. It then returns the split belonging to the edge +#' closest to the pointer. The network must be plotted beforehand. +#' +#' @param x an object of class \code{networx} +#' @param quiet a logical controlling whether to print a message inviting the +#' user to click on the tree. +#' @param \dots further arguments to be passed to or from other methods. +#' @return \code{identify.networx} returns a splits object. +#' @author Klaus Schliep \email{klaus.schliep@@gmail.com} +#' @seealso \code{\link[phangorn]{plot.networx}}, +#' \code{\link[graphics]{identify}} +#' @examples +#' \dontrun{ +#' data(yeast) +#' dm <- dist.ml(yeast) +#' nnet <- neighborNet(dm) +#' plot(nnet) +#' identify(nnet) # click close to an edge +#' } +#' @importFrom graphics identify +#' @method identify networx +#' @export +identify.networx <- function(x, quiet = FALSE, ...) { + if (!quiet) + cat("Click close to a node or edge of the tree...\n") + xy <- locator(1) + if (is.null(xy)) + return(NULL) + if (is.null(x$.plot)) { + lastPP <- get("last_plot.phylo", envir = .PlotPhyloEnv) + edge <- lastPP$edge + xx <- lastPP$xx + yy <- lastPP$yy + vertices <- cbind(xx, yy) + } + else { + lastPP <- x$.plot + edge <- x$edge + vertices <- lastPP$vertices + } + P1 <- vertices[edge[, 1], , drop = FALSE] + P2 <- vertices[edge[, 2], , drop = FALSE] + d <- closest.edge(xy$x, xy$y, P1, P2) + split <- x$splitIndex[which.min(d)] + x$splits[split] +} From 54a4ee5037ff45d93ccd86e2477926ac811e5b1d Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Sat, 1 Jun 2024 07:38:08 +0200 Subject: [PATCH 202/216] add rate to print output --- R/pml_generics.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/pml_generics.R b/R/pml_generics.R index 1d5d92ac..8af7b2b7 100644 --- a/R/pml_generics.R +++ b/R/pml_generics.R @@ -122,6 +122,7 @@ print.pml <- function(x, ...) { names(bf) <- levels print(bf) #cat(bf, "\n") } + if(!isTRUE(all.equal(x$rate, 1))) cat("\nRate:", x$rate, "\n") } From c81e9688e45af47ce93d3bccf0f8f74ee49d340b Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Sat, 1 Jun 2024 07:38:46 +0200 Subject: [PATCH 203/216] change defaults and experiment with early ... --- R/Densi.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/Densi.R b/R/Densi.R index 034c04f7..8708fe78 100644 --- a/R/Densi.R +++ b/R/Densi.R @@ -127,13 +127,13 @@ add_tiplabels <- function(xy, tip.label, direction, adj, font, srt = 0, cex = 1, #' } #' #' @export -densiTree <- function(x, type = "cladogram", alpha = 1 / length(x), +densiTree <- function(x, type = "phylogram", ..., alpha = 1 / length(x), consensus = NULL, direction = "rightwards", optim = FALSE, scaleX = FALSE, col = 1, width = 1, lty = 1, cex = .8, font = 3, tip.color = 1, adj = 0, srt = 0, underscore = FALSE, label.offset = 0, scale.bar = TRUE, jitter = list(amount = 0, random = TRUE), tip.dates=NULL, - xlim=NULL, ylim=NULL, ...) { + xlim=NULL, ylim=NULL) { if (!inherits(x, "multiPhylo")) stop("x must be of class multiPhylo") if (is.character(consensus)) { consensus <- stree(length(consensus), tip.label = consensus) From 57141b09b93602dc920f6ca18e6b90085e517389 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Thu, 6 Jun 2024 12:19:42 +0200 Subject: [PATCH 204/216] improve ancestral (when reading ASR from iqtree), update man pages --- R/ancestral.R | 3 +-- R/plotAnc.R | 19 +++++++++++++------ man/densiTree.Rd | 16 ++++++++-------- man/identify.networx.Rd | 2 +- man/phangorn-internal.Rd | 9 +++++---- man/plot.ancestral.Rd | 8 ++++---- man/plot.networx.Rd | 2 +- 7 files changed, 33 insertions(+), 26 deletions(-) diff --git a/R/ancestral.R b/R/ancestral.R index be45cb6e..77606c01 100644 --- a/R/ancestral.R +++ b/R/ancestral.R @@ -179,8 +179,7 @@ ancestral.pml <- function(object, type = "marginal", ...) { #' #' \code{write.ancestral} allows to export ancestral reconstructions. It writes #' out the tree, a tab delimited text file with the probabilities and the -#' alignment. \code{read.ancestral} tries to read in these files. -#' \code{ancestral} generates an object of class ancestral. +#' alignment. \code{ancestral} generates an object of class ancestral. #' #' This allows also to read in reconstruction made by iqtree to use the #' plotting capabilities of R. diff --git a/R/plotAnc.R b/R/plotAnc.R index 370d2bde..661b53dd 100644 --- a/R/plotAnc.R +++ b/R/plotAnc.R @@ -16,6 +16,11 @@ getAncDF <- function(x){ attr(tmp, "contrast") <- contrast / rowSums(contrast) tmp <- new2old.phyDat(tmp) df <- list2df_ancestral(tmp, x$data) + if(!identical(colnames(x$prob), colnames(df))){ + if(identical(toupper(colnames(x$prob)), toupper(colnames(df)))){ + colnames(x$prob) <- colnames(df) + } else stop("Invalid prob object") + } rbind(df, x$prob) } @@ -73,13 +78,14 @@ getAncDF <- function(x){ #' @importFrom ggseqlogo make_col_scheme ggseqlogo #' @rdname plot.ancestral #' @export -plotAnc <- function(x, i = 1, col = NULL, - cex.pie = .5, pos = "bottomright", scheme=NULL, - ...) { +plotAnc <- function(x, i = 1, ..., col = NULL, type="phylogram", + cex.pie = .5, pos = "bottomright", scheme=NULL) { stopifnot(inherits(x, "ancestral")) + type <- match.arg(type, c("phylogram", "cladogram", "fan", "unrooted", + "radial", "tidy")) + phylo_clado <- type %in% c("phylogram", "cladogram") df <- getAncDF(x) data <- x$data - type <- attr(data, "type") tree <- x$tree subset <- df[,"Site"] == i Y <- df[subset & !is.na(subset),] @@ -98,8 +104,8 @@ plotAnc <- function(x, i = 1, col = NULL, xrad <- CEX * diff(par("usr")[1:2]) / 50 levels <- attr(data, "levels") nc <- attr(data, "nc") - if(is.null(scheme) & type=="AA") scheme <- "Ape_AA" - if(is.null(scheme) & type=="DNA") scheme <- "Ape_NT" + if(is.null(scheme) & attr(data, "type")=="AA") scheme <- "Ape_AA" + if(is.null(scheme) & attr(data, "type")=="DNA") scheme <- "Ape_NT" if(!is.null(scheme)){ scheme <- match.arg(scheme, c("Ape_AA", "Zappo_AA", "Clustal", "Polarity", "Transmembrane_tendency", "Ape_NT", "RY_NT")) @@ -138,6 +144,7 @@ plotAnc <- function(x, i = 1, col = NULL, sel = seq_along(XX), thermo = NULL, piecol = col, col = "black", bg = "lightblue", horiz = FALSE, width = NULL, height = NULL, cex = cex.pie ) +# if(legend) legend(pos, legend=levels, pch=21, pt.bg = col) if (!is.null(pos)) legend(pos, legend=levels, pch=21, pt.bg = col) } diff --git a/man/densiTree.Rd b/man/densiTree.Rd index ca8b42d1..a6d88b1a 100644 --- a/man/densiTree.Rd +++ b/man/densiTree.Rd @@ -4,12 +4,12 @@ \alias{densiTree} \title{Plots a densiTree.} \usage{ -densiTree(x, type = "cladogram", alpha = 1/length(x), consensus = NULL, - direction = "rightwards", optim = FALSE, scaleX = FALSE, col = 1, - width = 1, lty = 1, cex = 0.8, font = 3, tip.color = 1, adj = 0, - srt = 0, underscore = FALSE, label.offset = 0, scale.bar = TRUE, - jitter = list(amount = 0, random = TRUE), tip.dates = NULL, - xlim = NULL, ylim = NULL, ...) +densiTree(x, type = "phylogram", ..., alpha = 1/length(x), + consensus = NULL, direction = "rightwards", optim = FALSE, + scaleX = FALSE, col = 1, width = 1, lty = 1, cex = 0.8, font = 3, + tip.color = 1, adj = 0, srt = 0, underscore = FALSE, + label.offset = 0, scale.bar = TRUE, jitter = list(amount = 0, random = + TRUE), tip.dates = NULL, xlim = NULL, ylim = NULL) } \arguments{ \item{x}{an object of class \code{multiPhylo}.} @@ -17,6 +17,8 @@ densiTree(x, type = "cladogram", alpha = 1/length(x), consensus = NULL, \item{type}{a character string specifying the type of phylogeny, so far "cladogram" (default) or "phylogram" are supported.} +\item{\dots}{further arguments to be passed to plot.} + \item{alpha}{parameter for semi-transparent colors.} \item{consensus}{A tree or character vector which is used to define the order @@ -67,8 +69,6 @@ jitter and random or equally spaced (see details below)} \item{xlim}{the x limits of the plot.} \item{ylim}{the y limits of the plot.} - -\item{\dots}{further arguments to be passed to plot.} } \value{ \code{densiTree} returns silently x. diff --git a/man/identify.networx.Rd b/man/identify.networx.Rd index 33c08eae..4e27c39e 100644 --- a/man/identify.networx.Rd +++ b/man/identify.networx.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/networx.R +% Please edit documentation in R/plot_networx.R \name{identify.networx} \alias{identify.networx} \title{Identify splits in a network} diff --git a/man/phangorn-internal.Rd b/man/phangorn-internal.Rd index e8f1f64c..1c5e438e 100644 --- a/man/phangorn-internal.Rd +++ b/man/phangorn-internal.Rd @@ -1,12 +1,13 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/RcppExports.R, R/candidate_tree.R, -% R/hash_phylo.R, R/networx.R, R/phangorn-package.R, R/phyDat.R, R/pmlPen.R +% R/hash_phylo.R, R/phangorn-package.R, R/phyDat.R, R/plot_networx.R, +% R/pmlPen.R \name{threshStateC} \alias{threshStateC} \alias{candidate_tree} \alias{hash} -\alias{coords} \alias{phangorn-internal} +\alias{coords} \alias{map_duplicates} \alias{pmlPen} \title{Internal phangorn Functions} @@ -18,10 +19,10 @@ candidate_tree(x, method = c("unrooted", "ultrametric", "tipdated"), hash(x, ...) -coords(obj, dim = "3D") - map_duplicates(x, dist = length(x) < 500, ...) +coords(obj, dim = "3D") + pmlPen(object, lambda, ...) } \description{ diff --git a/man/plot.ancestral.Rd b/man/plot.ancestral.Rd index 0d553f6b..580aa1e5 100644 --- a/man/plot.ancestral.Rd +++ b/man/plot.ancestral.Rd @@ -5,8 +5,8 @@ \alias{plotSeqLogo} \title{Plot ancestral character on a tree} \usage{ -plotAnc(x, i = 1, col = NULL, cex.pie = 0.5, pos = "bottomright", - scheme = NULL, ...) +plotAnc(x, i = 1, ..., col = NULL, type = "phylogram", cex.pie = 0.5, + pos = "bottomright", scheme = NULL) plotSeqLogo(x, node = getRoot(x$tree), start = 1, end = 10, scheme = "Ape_NT", ...) @@ -16,6 +16,8 @@ plotSeqLogo(x, node = getRoot(x$tree), start = 1, end = 10, \item{i}{plots the i-th site.} +\item{\dots}{Further arguments passed to or from other methods.} + \item{col}{a vector containing the colors for all possible states.} \item{cex.pie}{a numeric defining the size of the pie graphs.} @@ -26,8 +28,6 @@ plotSeqLogo(x, node = getRoot(x$tree), start = 1, end = 10, "Zappo_AA", "Clustal", "Polarity" and "Transmembrane_tendency", for nucleotides "Ape_NT" and"RY_NT". Names can be abbreviated.} -\item{\dots}{Further arguments passed to or from other methods.} - \item{node}{to plot for which the probabilities should be plotted.} \item{start}{start position to plot.} diff --git a/man/plot.networx.Rd b/man/plot.networx.Rd index d740627c..ee14cdc5 100644 --- a/man/plot.networx.Rd +++ b/man/plot.networx.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/networx.R +% Please edit documentation in R/plot_networx.R \name{plot.networx} \alias{plot.networx} \title{plot phylogenetic networks} From a4cc7b078d9ef76ed42f0bdbe45b078f08f6afa8 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Fri, 7 Jun 2024 12:03:20 +0200 Subject: [PATCH 205/216] update man page of plotAnc --- R/plotAnc.R | 6 ++++++ man/plot.ancestral.Rd | 9 +++++++++ 2 files changed, 15 insertions(+) diff --git a/R/plotAnc.R b/R/plotAnc.R index 661b53dd..7284bece 100644 --- a/R/plotAnc.R +++ b/R/plotAnc.R @@ -42,6 +42,9 @@ getAncDF <- function(x){ ## ,site ## @param which either "pie" or "seqlogo" #' @param node to plot for which the probabilities should be plotted. +#' @param type a character string specifying the type of phylogeny to be drawn; +#' it must be one of "phylogram" (the default), "cladogram", "fan", "unrooted", +#' "radial", "tidy", or any unambiguous abbreviation of these. #' @param start start position to plot. #' @param end end position to plot. #' @param col a vector containing the colors for all possible states. @@ -51,6 +54,7 @@ getAncDF <- function(x){ #' "Zappo_AA", "Clustal", "Polarity" and "Transmembrane_tendency", for #' nucleotides "Ape_NT" and"RY_NT". Names can be abbreviated. #' @param \dots Further arguments passed to or from other methods. +#' @returns \code{plotAnc} returns silently x. #' @author Klaus Schliep \email{klaus.schliep@@gmail.com} #' @seealso \code{\link{ancestral.pml}}, \code{\link[ape]{plot.phylo}}, #' \code{\link[ape]{image.DNAbin}}, \code{\link[ape]{image.AAbin}} @@ -146,6 +150,7 @@ plotAnc <- function(x, i = 1, ..., col = NULL, type="phylogram", ) # if(legend) legend(pos, legend=levels, pch=21, pt.bg = col) if (!is.null(pos)) legend(pos, legend=levels, pch=21, pt.bg = col) + invisible(x) } @@ -171,6 +176,7 @@ my_ggseqlogo <-function (data, facet = "wrap", scales = "free_x", ncol = NULL, #' @rdname plot.ancestral #' @importFrom ggplot2 scale_x_continuous ggplot facet_grid facet_wrap #' @importFrom ggseqlogo geom_logo theme_logo +#' @returns \code{plotSeqLogo} returns a ggplot object. #' @export plotSeqLogo <- function(x, node=getRoot(x$tree), start=1, end=10, scheme="Ape_NT", ...){ stopifnot(inherits(x, "ancestral")) diff --git a/man/plot.ancestral.Rd b/man/plot.ancestral.Rd index 580aa1e5..79d96efc 100644 --- a/man/plot.ancestral.Rd +++ b/man/plot.ancestral.Rd @@ -20,6 +20,10 @@ plotSeqLogo(x, node = getRoot(x$tree), start = 1, end = 10, \item{col}{a vector containing the colors for all possible states.} +\item{type}{a character string specifying the type of phylogeny to be drawn; +it must be one of "phylogram" (the default), "cladogram", "fan", "unrooted", +"radial", "tidy", or any unambiguous abbreviation of these.} + \item{cex.pie}{a numeric defining the size of the pie graphs.} \item{pos}{a character string defining the position of the legend.} @@ -34,6 +38,11 @@ nucleotides "Ape_NT" and"RY_NT". Names can be abbreviated.} \item{end}{end position to plot.} } +\value{ +\code{plotAnc} returns silently x. + +\code{plotSeqLogo} returns a ggplot object. +} \description{ \code{plotAnc} plots a phylogeny and adds character to the nodes. Either takes output from \code{ancestral.pars} or \code{ancestral.pml} or from an From 7cecbc0332e9b6bd065e4e1f1386e66207d1b93f Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Fri, 7 Jun 2024 13:55:52 +0200 Subject: [PATCH 206/216] update man page --- man/write.ancestral.Rd | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/man/write.ancestral.Rd b/man/write.ancestral.Rd index a1db929a..923ee026 100644 --- a/man/write.ancestral.Rd +++ b/man/write.ancestral.Rd @@ -32,8 +32,7 @@ each state and site.} \description{ \code{write.ancestral} allows to export ancestral reconstructions. It writes out the tree, a tab delimited text file with the probabilities and the -alignment. \code{read.ancestral} tries to read in these files. -\code{ancestral} generates an object of class ancestral. +alignment. \code{ancestral} generates an object of class ancestral. } \details{ This allows also to read in reconstruction made by iqtree to use the From 2a046ae6e3e8802dd866c3041cf94e674a5f7c11 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Tue, 11 Jun 2024 11:26:52 +0200 Subject: [PATCH 207/216] Add lt2amb the equivalent to latag2n in ape. Generic in future? --- R/lt2amb.R | 44 ++++++++++++++++++++++++++++++++++++++++++++ man/ltg2amb.Rd | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 77 insertions(+) create mode 100644 R/lt2amb.R create mode 100644 man/ltg2amb.Rd diff --git a/R/lt2amb.R b/R/lt2amb.R new file mode 100644 index 00000000..e93a3e92 --- /dev/null +++ b/R/lt2amb.R @@ -0,0 +1,44 @@ +#' Replace leading and trailing alignment gaps with an ambiguous state +#' +#' Substitutes leading and trailing alignment gaps in aligned sequences into N +#' (i.e., A, C, G, or T) or ?. The gaps in the middle of the sequences are left +#' unchanged. +#' @param x an object of class \code{phyDat}. +#' @param amb character of the ambiguous state t replace the gaps. +#' @param gap gap parameter to replace. +#' @returns returns an object of class \code{phyDat}. +#' @seealso \code{\link{latag2n}} +#' @examples +#' x <- phyDat(matrix(c("-", "A", "G", "-", "T", "C"), 2, 3)) +#' y <- ltg2amb(x) +#' image(x) +#' image(y) +#' @keywords cluster +#' @export +ltg2amb <- function(x, amb=ifelse(attr(x,"type")=="DNA", "N", "?"), gap="-"){ + X <- as.character(x) + d <- dim(X) + for(i in seq_len(d[1])){ + # from start + j <- 1 + while(X[i,j]==gap){ + X[i,j] <- amb + j <- j+1 + if(j>d[2])break() + } + j <- d[2] + while(X[i,j]==gap){ + X[i,j] <- amb + j = j-1 + if(j<1)break() + } + } + type <- attr(x, "type") + if(type %in% c("DNA", "AA", "CODON")){ + y <- phyDat(X, type=type) + } else y <- phyDat(X, type="USER", levels = attr(x, "levels")) + y +} +# same as: +# dna2 <- as.DNAbin(dna) |> latag2n() |> as.phyDat() +# but works for AA diff --git a/man/ltg2amb.Rd b/man/ltg2amb.Rd new file mode 100644 index 00000000..49efa394 --- /dev/null +++ b/man/ltg2amb.Rd @@ -0,0 +1,33 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lt2amb.R +\name{ltg2amb} +\alias{ltg2amb} +\title{Replace leading and trailing alignment gaps with an ambiguous state} +\usage{ +ltg2amb(x, amb = ifelse(attr(x, "type") == "DNA", "N", "?"), gap = "-") +} +\arguments{ +\item{x}{an object of class \code{phyDat}.} + +\item{amb}{character of the ambiguous state t replace the gaps.} + +\item{gap}{gap parameter to replace.} +} +\value{ +returns an object of class \code{phyDat}. +} +\description{ +Substitutes leading and trailing alignment gaps in aligned sequences into N +(i.e., A, C, G, or T) or ?. The gaps in the middle of the sequences are left +unchanged. +} +\examples{ +x <- phyDat(matrix(c("-", "A", "G", "-", "T", "C"), 2, 3)) +y <- ltg2amb(x) +image(x) +image(y) +} +\seealso{ +\code{\link{latag2n}} +} +\keyword{cluster} From 5ae576146db9b6a26c28917a3e002f0dae3d4d52 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Tue, 11 Jun 2024 11:34:47 +0200 Subject: [PATCH 208/216] add experimental joint reconstruction, based on Liam's phytools code. --- R/joint_ASR.R | 73 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) create mode 100644 R/joint_ASR.R diff --git a/R/joint_ASR.R b/R/joint_ASR.R new file mode 100644 index 00000000..b7ec6d54 --- /dev/null +++ b/R/joint_ASR.R @@ -0,0 +1,73 @@ +# Joint reconstruction +joint_pml <- function(x){ + stopifnot(inherits(x, "pml")) + if(x$k > 1 || x$inv>0) stop("One one rate allowed so far!") + data <- x$data + eig <- x$eig + contrast <- attr(data, "contrast") + tree <- x$tree + tree <- reorder(tree, "postorder") + edge <- tree$edge + ntip <- Ntip(tree) + desc <- Descendants(tree, type="children") + el <- numeric(max(tree$edge)) + el[tree$edge[,2]] <- tree$edge.length + P <- getP(el * x$rate, eig = eig) + nr <- attr(data, "nr") + nc <- attr(data, "nc") + levels <- attr(data, "levels") + allLevels <- attr(data, "allLevels") + l <- length(tree$edge.length) + + L <- C <- array(NA, c(nr, nc, max(tree$edge))) + for(i in seq_len(Ntip(tree))){ + L[,,i]<- log(contrast[data[[i]],,drop=FALSE]%*%P[[i]]) + } + nn <- unique(edge[,1]) + pa <- 0 + root <- edge[l, 1] + lnr <- seq_len(nr) + for(i in seq_len(length(nn))){ + pa <- nn[i] + ch <- desc[[pa]] + P_i <- P[[pa]] + tmp1 <- tmp2 <- matrix(0, nr, nc) + for(j in seq_along(ch)){ + tmp1 <- tmp1 + L[,,ch[j]] + } + if(pa==root) break() + for(j in 1:nc){ + pp <- tmp1 + rep(log(P_i[j,]), each=nr) + pos <- max.col(pp) + L[,j,pa]<- pp[ cbind(lnr, pos)] + C[,j,pa] <- pos + } + } + pp <- tmp1 + rep(log(x$bf), each=nr) + pos <- max.col(pp) + L[,,pa] <- pp + C[,,pa] <- pos + tree <- reorder(tree) + if(is.null(tree$node.label)) tree <- makeNodeLabel(tree) + res <- vector("list", length(tree$node.label)) + names(res) <- tree$node.label + res[[1]] <- pos + att <- attributes(data) + att$names <- tree$node.label + labels <- c(tree$tip.label, tree$node.label) + edge <- tree$edge + nrw <- seq_len(nr) + for(i in seq_along(edge[,1])){ + ch_i <- edge[i,2] + pa_i <- edge[i,1] + if(ch_i > ntip){ + pos <-res[[labels[pa_i]]] + res[[labels[ch_i]]] <- C[nrw,pos,ch_i] + } + } + ind <- match(levels, allLevels) + for(i in length(res)) res[[i]] <- ind[res[[i]]] + attributes(res) <- att + res +} + From b1927f737b9a069ef8e83819494323595f87e1f8 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Tue, 11 Jun 2024 11:50:08 +0200 Subject: [PATCH 209/216] update NEWS --- NAMESPACE | 1 + 1 file changed, 1 insertion(+) diff --git a/NAMESPACE b/NAMESPACE index e84efdd2..afb5fb29 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -173,6 +173,7 @@ export(keep_as_tip) export(ldfactorial) export(lento) export(lli) +export(ltg2amb) export(map_duplicates) export(mast) export(matchSplits) From ddbed492872f3b031d18e74e5c1e26a2ab8266d1 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Tue, 11 Jun 2024 12:03:02 +0200 Subject: [PATCH 210/216] add some links --- R/ancestral.R | 3 ++- R/lt2amb.R | 3 ++- man/ancestral.pml.Rd | 3 ++- man/ltg2amb.Rd | 3 ++- 4 files changed, 8 insertions(+), 4 deletions(-) diff --git a/R/ancestral.R b/R/ancestral.R index 77606c01..a2fb8873 100644 --- a/R/ancestral.R +++ b/R/ancestral.R @@ -42,7 +42,8 @@ ## returned. #' @author Klaus Schliep \email{klaus.schliep@@gmail.com} #' @seealso \code{\link{pml}}, \code{\link{parsimony}}, \code{\link[ape]{ace}}, -#' \code{\link{plotAnc}}, \code{\link[ape]{root}}, +#' \code{\link{plotAnc}}, \code{\link{lt2amb}}, \code{\link{latag2n}}, +#' \code{\link{gap_as_state}}, \code{\link[ape]{root}}, #' \code{\link[ape]{makeNodeLabel}} #' @references Felsenstein, J. (2004). \emph{Inferring Phylogenies}. Sinauer #' Associates, Sunderland. diff --git a/R/lt2amb.R b/R/lt2amb.R index e93a3e92..09c5aa1a 100644 --- a/R/lt2amb.R +++ b/R/lt2amb.R @@ -7,7 +7,8 @@ #' @param amb character of the ambiguous state t replace the gaps. #' @param gap gap parameter to replace. #' @returns returns an object of class \code{phyDat}. -#' @seealso \code{\link{latag2n}} +#' @seealso \code{\link{latag2n}}, \code{\link{ancestral.pml}}, +#' \code{\link{gap_as_state}} #' @examples #' x <- phyDat(matrix(c("-", "A", "G", "-", "T", "C"), 2, 3)) #' y <- ltg2amb(x) diff --git a/man/ancestral.pml.Rd b/man/ancestral.pml.Rd index 353c45ea..06fa0dba 100644 --- a/man/ancestral.pml.Rd +++ b/man/ancestral.pml.Rd @@ -92,7 +92,8 @@ Press, Oxford. } \seealso{ \code{\link{pml}}, \code{\link{parsimony}}, \code{\link[ape]{ace}}, -\code{\link{plotAnc}}, \code{\link[ape]{root}}, +\code{\link{plotAnc}}, \code{\link{lt2amb}}, \code{\link{latag2n}}, +\code{\link{gap_as_state}}, \code{\link[ape]{root}}, \code{\link[ape]{makeNodeLabel}} } \author{ diff --git a/man/ltg2amb.Rd b/man/ltg2amb.Rd index 49efa394..f6e68f24 100644 --- a/man/ltg2amb.Rd +++ b/man/ltg2amb.Rd @@ -28,6 +28,7 @@ image(x) image(y) } \seealso{ -\code{\link{latag2n}} +\code{\link{latag2n}}, \code{\link{ancestral.pml}}, +\code{\link{gap_as_state}} } \keyword{cluster} From 5086adc366f37547d7b1a75979372139224625d9 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Tue, 11 Jun 2024 12:41:45 +0200 Subject: [PATCH 211/216] fix spelling --- R/ancestral.R | 2 +- man/ancestral.pml.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/ancestral.R b/R/ancestral.R index a2fb8873..0d891da4 100644 --- a/R/ancestral.R +++ b/R/ancestral.R @@ -42,7 +42,7 @@ ## returned. #' @author Klaus Schliep \email{klaus.schliep@@gmail.com} #' @seealso \code{\link{pml}}, \code{\link{parsimony}}, \code{\link[ape]{ace}}, -#' \code{\link{plotAnc}}, \code{\link{lt2amb}}, \code{\link{latag2n}}, +#' \code{\link{plotAnc}}, \code{\link{ltg2amb}}, \code{\link{latag2n}}, #' \code{\link{gap_as_state}}, \code{\link[ape]{root}}, #' \code{\link[ape]{makeNodeLabel}} #' @references Felsenstein, J. (2004). \emph{Inferring Phylogenies}. Sinauer diff --git a/man/ancestral.pml.Rd b/man/ancestral.pml.Rd index 06fa0dba..b84f3a60 100644 --- a/man/ancestral.pml.Rd +++ b/man/ancestral.pml.Rd @@ -92,7 +92,7 @@ Press, Oxford. } \seealso{ \code{\link{pml}}, \code{\link{parsimony}}, \code{\link[ape]{ace}}, -\code{\link{plotAnc}}, \code{\link{lt2amb}}, \code{\link{latag2n}}, +\code{\link{plotAnc}}, \code{\link{ltg2amb}}, \code{\link{latag2n}}, \code{\link{gap_as_state}}, \code{\link[ape]{root}}, \code{\link[ape]{makeNodeLabel}} } From 971d9814e30895051971c24303ca9a5bdc2a06a9 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Sun, 16 Jun 2024 18:07:01 +0200 Subject: [PATCH 212/216] small improvements --- R/gap_as_state.R | 3 ++- R/plotAnc.R | 6 +++--- R/treeManipulation.R | 10 ++++++---- man/allTrees.Rd | 7 ++++--- man/gap_as_state.Rd | 3 ++- man/plot.ancestral.Rd | 10 +++++----- 6 files changed, 22 insertions(+), 17 deletions(-) diff --git a/R/gap_as_state.R b/R/gap_as_state.R index f71fdb91..f8c199ce 100644 --- a/R/gap_as_state.R +++ b/R/gap_as_state.R @@ -10,7 +10,8 @@ #' @param ambiguous a character which codes for the ambiguous state #' @return The functions return an object of class \code{phyDat}. #' @author Klaus Schliep \email{klaus.schliep@@gmail.com} -#' @seealso \code{\link{phyDat}} +#' @seealso \code{\link{phyDat}}, \code{\link{lt2amb}}, \code{\link{latag2n}}, +#' \code{\link{ancestral.pml}}, \code{\link{gap_as_state}} #' @keywords cluster #' @examples #' data(Laurasiatherian) diff --git a/R/plotAnc.R b/R/plotAnc.R index 7284bece..327aa077 100644 --- a/R/plotAnc.R +++ b/R/plotAnc.R @@ -82,7 +82,7 @@ getAncDF <- function(x){ #' @importFrom ggseqlogo make_col_scheme ggseqlogo #' @rdname plot.ancestral #' @export -plotAnc <- function(x, i = 1, ..., col = NULL, type="phylogram", +plotAnc <- function(x, i = 1, type="phylogram", ..., col = NULL, cex.pie = .5, pos = "bottomright", scheme=NULL) { stopifnot(inherits(x, "ancestral")) type <- match.arg(type, c("phylogram", "cladogram", "fan", "unrooted", @@ -131,13 +131,13 @@ plotAnc <- function(x, i = 1, ..., col = NULL, type="phylogram", col <- sc$col nc <- ncol(y) } - plot(tree, label.offset = 1.1 * xrad, plot = FALSE, ...) + plot(tree, label.offset = 1.1 * xrad, plot = FALSE, type=type, ...) lastPP <- get("last_plot.phylo", envir = .PlotPhyloEnv) XX <- lastPP$xx YY <- lastPP$yy xrad <- CEX * diff(lastPP$x.lim * 1.1) / 50 par(new = TRUE) - plot(tree, label.offset = 1.1 * xrad, plot = TRUE, ...) + plot(tree, label.offset = 1.1 * xrad, plot = TRUE, type=type, ...) if (is.null(col)) col <- hcl.colors(nc) #rainbow(nc) if(!is.null(names(col))) col <- col[attr(data, "levels")] if (length(col) != nc) { diff --git a/R/treeManipulation.R b/R/treeManipulation.R index 62f09eb0..fd0e4837 100644 --- a/R/treeManipulation.R +++ b/R/treeManipulation.R @@ -512,8 +512,8 @@ add.tips <- function(tree, tips, where, edge.length = NULL) { #' Compute all trees topologies. #' -#' \code{allTrees} computes all tree topologies for rooted or unrooted trees -#' with up to 10 tips. \code{allTrees} returns bifurcating trees. +#' \code{allTrees} computes all bifurcating tree topologies for rooted or unrooted +#' trees with up to 10 tips. The number of trees grows fast #' #' #' @param n Number of tips (<=10). @@ -521,7 +521,8 @@ add.tips <- function(tree, tips, where, edge.length = NULL) { #' @param tip.label Tip labels. #' @return an object of class \code{multiPhylo}. #' @author Klaus Schliep \email{klaus.schliep@@gmail.com} -#' @seealso \code{\link[ape]{rtree}}, \code{\link{nni}} +#' @seealso \code{\link[ape]{rtree}}, \code{\link{nni}}, +#' \code{\link[ape]{howmanytrees}}, \code{\link{dfactorial}} #' @keywords cluster #' @examples #' @@ -536,6 +537,7 @@ add.tips <- function(tree, tips, where, edge.length = NULL) { allTrees <- function(n, rooted = FALSE, tip.label = NULL) { n <- as.integer(n) nt <- as.integer(round(dfactorial(2 * (n + rooted) - 5))) + Nnode <- as.integer(n - 2L + rooted) if ( (n + rooted) > 10) { stop(gettextf("That would generate %d trees, and take up more than %d MB of memory!", nt, as.integer(round(nt / 1000)), domain = "R-phangorn")) @@ -606,7 +608,7 @@ allTrees <- function(n, rooted = FALSE, tip.label = NULL) { edge <- edges[[x]] edge <- edge[reorderRcpp(edge, n, n + 1L, 2L), ] tree <- list(edge = edge) - tree$Nnode <- as.integer(n - 2L + rooted) + tree$Nnode <- Nnode attr(tree, "order") <- "postorder" class(tree) <- "phylo" trees[[x]] <- tree diff --git a/man/allTrees.Rd b/man/allTrees.Rd index 4e45a551..d235c3f0 100644 --- a/man/allTrees.Rd +++ b/man/allTrees.Rd @@ -17,8 +17,8 @@ allTrees(n, rooted = FALSE, tip.label = NULL) an object of class \code{multiPhylo}. } \description{ -\code{allTrees} computes all tree topologies for rooted or unrooted trees -with up to 10 tips. \code{allTrees} returns bifurcating trees. +\code{allTrees} computes all bifurcating tree topologies for rooted or unrooted +trees with up to 10 tips. The number of trees grows fast } \examples{ @@ -31,7 +31,8 @@ par(old.par) } \seealso{ -\code{\link[ape]{rtree}}, \code{\link{nni}} +\code{\link[ape]{rtree}}, \code{\link{nni}}, +\code{\link[ape]{howmanytrees}}, \code{\link{dfactorial}} } \author{ Klaus Schliep \email{klaus.schliep@gmail.com} diff --git a/man/gap_as_state.Rd b/man/gap_as_state.Rd index 554fb274..b2347a88 100644 --- a/man/gap_as_state.Rd +++ b/man/gap_as_state.Rd @@ -36,7 +36,8 @@ rownames(contr) <- attr(tmp, "allLevels") contr } \seealso{ -\code{\link{phyDat}} +\code{\link{phyDat}}, \code{\link{lt2amb}}, \code{\link{latag2n}}, +\code{\link{ancestral.pml}}, \code{\link{gap_as_state}} } \author{ Klaus Schliep \email{klaus.schliep@gmail.com} diff --git a/man/plot.ancestral.Rd b/man/plot.ancestral.Rd index 79d96efc..e6a5bcf2 100644 --- a/man/plot.ancestral.Rd +++ b/man/plot.ancestral.Rd @@ -5,7 +5,7 @@ \alias{plotSeqLogo} \title{Plot ancestral character on a tree} \usage{ -plotAnc(x, i = 1, ..., col = NULL, type = "phylogram", cex.pie = 0.5, +plotAnc(x, i = 1, type = "phylogram", ..., col = NULL, cex.pie = 0.5, pos = "bottomright", scheme = NULL) plotSeqLogo(x, node = getRoot(x$tree), start = 1, end = 10, @@ -16,14 +16,14 @@ plotSeqLogo(x, node = getRoot(x$tree), start = 1, end = 10, \item{i}{plots the i-th site.} -\item{\dots}{Further arguments passed to or from other methods.} - -\item{col}{a vector containing the colors for all possible states.} - \item{type}{a character string specifying the type of phylogeny to be drawn; it must be one of "phylogram" (the default), "cladogram", "fan", "unrooted", "radial", "tidy", or any unambiguous abbreviation of these.} +\item{\dots}{Further arguments passed to or from other methods.} + +\item{col}{a vector containing the colors for all possible states.} + \item{cex.pie}{a numeric defining the size of the pie graphs.} \item{pos}{a character string defining the position of the legend.} From 17ce4c1edbb670e9d85b4b33d34df0011bb9b7c4 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Mon, 17 Jun 2024 12:37:24 +0200 Subject: [PATCH 213/216] bugfix --- R/ancestral.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/R/ancestral.R b/R/ancestral.R index 0d891da4..ce2a45d9 100644 --- a/R/ancestral.R +++ b/R/ancestral.R @@ -224,9 +224,11 @@ ancestral <- function(tree, align, prob){ stopifnot(inherits(tree, "phylo")) stopifnot(inherits(align, "phyDat")) stopifnot(inherits(prob, "data.frame")) + if(is.null(tree$node.label))stop("tree needs node.label") state <- extract_states(prob, attr(align, "type"), levels=attr(align, "levels")) - erg <- list(tree=tree, data=align, prob=prob, state=state) + erg <- list(tree=tree, data=align[tree$tip.label], prob=prob, + state=state[tree$node.label]) class(erg) <- "ancestral" erg } From 3b52c30a8cce6e9cf55611a36ad536a66eea1751 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Mon, 17 Jun 2024 12:47:06 +0200 Subject: [PATCH 214/216] bugfix --- R/plotAnc.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/plotAnc.R b/R/plotAnc.R index 327aa077..9ea7fca4 100644 --- a/R/plotAnc.R +++ b/R/plotAnc.R @@ -98,6 +98,7 @@ plotAnc <- function(x, i = 1, type="phylogram", ..., col = NULL, # y <- y[, -c(1:3)] colnames(y) <- gsub("p_", "", colnames(y)) row.names(y) <- Y$Node + y <- y[c(tree$tip.label, tree$node.label), ] if(is.null(tree$node.label) || any(is.na(match(tree$node.label, rownames(y)))) || is.numeric(tree$node.label)) tree <- makeNodeLabel(tree) From ea8ca9b99e02044ce1705d62d59174016986cd09 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Mon, 17 Jun 2024 17:48:23 +0200 Subject: [PATCH 215/216] bugfix --- R/joint_ASR.R | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/R/joint_ASR.R b/R/joint_ASR.R index b7ec6d54..e279b46d 100644 --- a/R/joint_ASR.R +++ b/R/joint_ASR.R @@ -18,7 +18,6 @@ joint_pml <- function(x){ levels <- attr(data, "levels") allLevels <- attr(data, "allLevels") l <- length(tree$edge.length) - L <- C <- array(NA, c(nr, nc, max(tree$edge))) for(i in seq_len(Ntip(tree))){ L[,,i]<- log(contrast[data[[i]],,drop=FALSE]%*%P[[i]]) @@ -62,7 +61,7 @@ joint_pml <- function(x){ pa_i <- edge[i,1] if(ch_i > ntip){ pos <-res[[labels[pa_i]]] - res[[labels[ch_i]]] <- C[nrw,pos,ch_i] + res[[labels[ch_i]]] <- C[cbind(nrw,pos,ch_i)] } } ind <- match(levels, allLevels) From 5c33b58cdb383f1ddc457819798881e55a989698 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Wed, 19 Jun 2024 16:30:31 +0200 Subject: [PATCH 216/216] fix wrong links --- R/gap_as_state.R | 2 +- R/{lt2amb.R => ltg2amb.R} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename R/{lt2amb.R => ltg2amb.R} (100%) diff --git a/R/gap_as_state.R b/R/gap_as_state.R index f8c199ce..c6917a11 100644 --- a/R/gap_as_state.R +++ b/R/gap_as_state.R @@ -10,7 +10,7 @@ #' @param ambiguous a character which codes for the ambiguous state #' @return The functions return an object of class \code{phyDat}. #' @author Klaus Schliep \email{klaus.schliep@@gmail.com} -#' @seealso \code{\link{phyDat}}, \code{\link{lt2amb}}, \code{\link{latag2n}}, +#' @seealso \code{\link{phyDat}}, \code{\link{ltg2amb}}, \code{\link{latag2n}}, #' \code{\link{ancestral.pml}}, \code{\link{gap_as_state}} #' @keywords cluster #' @examples diff --git a/R/lt2amb.R b/R/ltg2amb.R similarity index 100% rename from R/lt2amb.R rename to R/ltg2amb.R