From 4adcbda9c1e943c92f1b9a1f1a59965eccb83846 Mon Sep 17 00:00:00 2001 From: Klaus Schliep Date: Tue, 24 Dec 2024 12:10:08 +0100 Subject: [PATCH] small improvements --- .Rbuildignore | 1 + CONTRIBUTING.md | 3 ++- NAMESPACE | 1 + R/distTree.R | 5 +---- R/fitch64.R | 2 +- R/joint_ASR.R | 36 +++++++++++++++++++++++++++--------- R/plotAnc.R | 2 +- man/designTree.Rd | 2 +- man/parsimony_edgelength.Rd | 24 ++++++++++++++++++++++++ man/plot.ancestral.Rd | 2 +- 10 files changed, 60 insertions(+), 18 deletions(-) create mode 100644 man/parsimony_edgelength.Rd diff --git a/.Rbuildignore b/.Rbuildignore index 5d3a89f0..6e3409c0 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -19,3 +19,4 @@ ^codemeta\.json$ ^codecov\.yml$ ^\.out.R +^CONTRIBUTING.md$ diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 7469f2d0..c31debad 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -12,10 +12,11 @@ phangorn is an open source project, maintained by people who care. We are not di [website]: https://KlausVigo.github.io/phangorn [citation]: https://KlausVigo.github.io/phangorn/authors.html [email]: mailto:klaus.schliep@gmail.com - + ## How you can contribute diff --git a/NAMESPACE b/NAMESPACE index 49763310..54a1de4b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -198,6 +198,7 @@ export(optim.parsimony) export(optim.pml) export(pace) export(parsimony) +export(parsimony_edgelength) export(path.dist) export(phyDat) export(phyDat2MultipleAlignment) diff --git a/R/distTree.R b/R/distTree.R index 7d2e65c2..fb570362 100644 --- a/R/distTree.R +++ b/R/distTree.R @@ -305,9 +305,6 @@ designUnrooted2 <- function(tree, sparse = TRUE) { designTipDated <- function(tree, tip.dates, sparse=TRUE){ - #, strict=TRUE - #if(!is.numeric(tip.dates)) browser() - #if(!length(tip.dates) >= Ntip(tree)) browser() stopifnot(is.numeric(tip.dates), length(tip.dates) >= Ntip(tree)) nTip <- Ntip(tree) tmp <- function(n){ @@ -367,7 +364,7 @@ designConstrained <- function(tree, sparse=TRUE, tip.dates=NULL, #' @export nnls.tree <- function(dm, tree, method=c("unrooted", "ultrametric", "tipdated"), rooted=NULL, trace=1, weight=NULL, balanced=FALSE, tip.dates=NULL, - constraint=NULL) { + calibration=NULL) { method <- match.arg(method, c("unrooted", "ultrametric", "tipdated")) if(has.singles(tree)) tree <- collapse.singles(tree) if (is.rooted(tree) && method == "unrooted") tree <- unroot(tree) diff --git a/R/fitch64.R b/R/fitch64.R index 9954b5fe..779c5469 100644 --- a/R/fitch64.R +++ b/R/fitch64.R @@ -89,7 +89,7 @@ random.addition <- function (data, tree=NULL, method = "fitch") tree$edge <- edge remaining <- sample(setdiff(label, tree$tip.label)) tree$tip.label <- c(tree$tip.label, remaining) - tree <- relabel(tree, label) + tree <- checkLabels(tree, label) remaining <- match(remaining, label) } diff --git a/R/joint_ASR.R b/R/joint_ASR.R index cac60932..740f4eaf 100644 --- a/R/joint_ASR.R +++ b/R/joint_ASR.R @@ -139,26 +139,44 @@ joint_sankoff <- function(tree, data, cost=NULL){ } -# alternative to acctran(tree, data) +#' Assign edge length to tree +#' +#' \code{parsimony_edgelength} and \code{acctran} assign edge length to a tree where +#' the edge length is the number of mutations. \code{parsimony_edgelengths} +#' assigns edge lengths using a joint reconstruction based on the sankoff +#' algorithm. Ties are broken at random and trees can be multifurating. +#' \code{acctran} is based on the fitch algorithm and is faster. However trees +#' need to be bifurcating and ties are split. +#' @param tree a tree, i.e. an object of class pml +#' @param data an object of class phyDat +#' @return a tree with edge length. +#' @export +parsimony_edgelength <- function(tree, data){ + if(inherits(tree, "phylo")) return(count_mutations(tree, data)) + if(inherits(tree, "multiPhylo")) { + res <- lapply(tree, count_mutations, data=data) + class(res) <- "multiPhylo" + return(res) + } + NULL +} + + count_mutations <- function(tree, data){ - site <- "pscore" tree <- reorder(tree, "postorder") data <- data[tree$tip.label] tree_tmp <- makeNodeLabel(tree) anc <- joint_sankoff(tree_tmp, data) +# ind <- length(data)+seq_along(anc) +# data[ind] <- anc +# names(dat[ind]) <- names(anc) dat <- rbind(data, anc) - nr <- attr(data, "nr") l <- length(dat) - fun <- function(x, site="pscore", nr){ - if(site=="pscore") return(f$pscore(x)) - sites <- f$sitewise_pscore(x) - sites[seq_len(nr)] - } f <- init_fitch(dat, FALSE, FALSE, m=2L) el <- numeric(nrow(tree$edge)) for(i in seq_along(el)){ edge_i <- matrix(c(l+1L, l+1L, tree$edge[i,]), 2, 2) - el[i] <- fun(edge_i, site, nr) + el[i] <- f$pscore(edge_i) } tree$edge.length <- el tree diff --git a/R/plotAnc.R b/R/plotAnc.R index 265eed3a..02280a34 100644 --- a/R/plotAnc.R +++ b/R/plotAnc.R @@ -41,7 +41,7 @@ getTransition <- function(scheme, levels){ #' @param \dots Further arguments passed to or from other methods. #' @returns \code{plotAnc} returns silently x. #' @author Klaus Schliep \email{klaus.schliep@@gmail.com} -#' @seealso \code{\link{ancestral.pml}}, \code{\link[ape]{plot.phylo}}, +#' @seealso \code{\link{anc_pml}}, \code{\link[ape]{plot.phylo}}, #' \code{\link[ape]{image.DNAbin}}, \code{\link[ape]{image.AAbin}} #' \code{\link[ggseqlogo]{ggseqlogo}}, \code{\link[ape]{edgelabels}} #' @keywords plot diff --git a/man/designTree.Rd b/man/designTree.Rd index 30a62297..98b40c74 100644 --- a/man/designTree.Rd +++ b/man/designTree.Rd @@ -14,7 +14,7 @@ designTree(tree, method = "unrooted", sparse = FALSE, tip.dates = NULL, nnls.tree(dm, tree, method = c("unrooted", "ultrametric", "tipdated"), rooted = NULL, trace = 1, weight = NULL, balanced = FALSE, - tip.dates = NULL) + tip.dates = NULL, calibration = NULL) nnls.phylo(x, dm, method = "unrooted", trace = 0, ...) diff --git a/man/parsimony_edgelength.Rd b/man/parsimony_edgelength.Rd new file mode 100644 index 00000000..89067657 --- /dev/null +++ b/man/parsimony_edgelength.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/joint_ASR.R +\name{parsimony_edgelength} +\alias{parsimony_edgelength} +\title{Assign edge length to tree} +\usage{ +parsimony_edgelength(tree, data) +} +\arguments{ +\item{tree}{a tree, i.e. an object of class pml} + +\item{data}{an object of class phyDat} +} +\value{ +a tree with edge length. +} +\description{ +\code{parsimony_edgelength} and \code{acctran} assign edge length to a tree where +the edge length is the number of mutations. \code{parsimony_edgelengths} +assigns edge lengths using a joint reconstruction based on the sankoff +algorithm. Ties are broken at random and trees can be multifurating. +\code{acctran} is based on the fitch algorithm and is faster. However trees +need to be bifurcating and ties are split. +} diff --git a/man/plot.ancestral.Rd b/man/plot.ancestral.Rd index eff6544c..d23f18f5 100644 --- a/man/plot.ancestral.Rd +++ b/man/plot.ancestral.Rd @@ -91,7 +91,7 @@ add_mutations(anc_aa, adj = c(.5, -0.3), col=2) } \seealso{ -\code{\link{ancestral.pml}}, \code{\link[ape]{plot.phylo}}, +\code{\link{anc_pml}}, \code{\link[ape]{plot.phylo}}, \code{\link[ape]{image.DNAbin}}, \code{\link[ape]{image.AAbin}} \code{\link[ggseqlogo]{ggseqlogo}}, \code{\link[ape]{edgelabels}} }