diff --git a/.Rbuildignore b/.Rbuildignore index ac50ef3d..07bb9909 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -16,3 +16,5 @@ ^vignettes/MLbyHand_cache$ ^vignettes/AdvancedFeatures_cache$ ^vignettes/logo.png +^codemeta\.json$ +^codecov\.yml$ diff --git a/.github/workflows/recheck.yml b/.github/workflows/recheck.yml new file mode 100644 index 00000000..323d2ad3 --- /dev/null +++ b/.github/workflows/recheck.yml @@ -0,0 +1,18 @@ +on: + workflow_dispatch: + inputs: + which: + type: choice + description: Which dependents to check + options: + - strong + - most + +name: Reverse dependency check + +jobs: + revdep_check: + name: Reverse check ${{ inputs.which }} dependents + uses: r-devel/recheck/.github/workflows/recheck.yml@v1 + with: + which: ${{ inputs.which }} diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml index 2c5bb502..fefc52e2 100644 --- a/.github/workflows/test-coverage.yaml +++ b/.github/workflows/test-coverage.yaml @@ -8,6 +8,8 @@ on: name: test-coverage +permissions: read-all + jobs: test-coverage: runs-on: ubuntu-latest @@ -15,7 +17,7 @@ jobs: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: r-lib/actions/setup-r@v2 with: @@ -23,28 +25,37 @@ jobs: - uses: r-lib/actions/setup-r-dependencies@v2 with: - extra-packages: any::covr + extra-packages: any::covr, any::xml2 needs: coverage - name: Test coverage run: | - covr::codecov( + cov <- covr::package_coverage( quiet = FALSE, clean = FALSE, - install_path = file.path(Sys.getenv("RUNNER_TEMP"), "package") + install_path = file.path(normalizePath(Sys.getenv("RUNNER_TEMP"), winslash = "/"), "package") ) + covr::to_cobertura(cov) shell: Rscript {0} + - uses: codecov/codecov-action@v4 + with: + fail_ci_if_error: ${{ github.event_name != 'pull_request' && true || false }} + file: ./cobertura.xml + plugin: noop + disable_search: true + token: ${{ secrets.CODECOV_TOKEN }} + - name: Show testthat output if: always() run: | ## -------------------------------------------------------------------- - find ${{ runner.temp }}/package -name 'testthat.Rout*' -exec cat '{}' \; || true + find '${{ runner.temp }}/package' -name 'testthat.Rout*' -exec cat '{}' \; || true shell: bash - name: Upload test results if: failure() - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: coverage-test-failures path: ${{ runner.temp }}/package diff --git a/DESCRIPTION b/DESCRIPTION index 687898f6..b364e172 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: phangorn Title: Phylogenetic Reconstruction and Analysis -Version: 2.12.0.900 +Version: 3.0.0.0 Authors@R: c(person("Klaus", "Schliep", role = c("aut", "cre"), email = "klaus.schliep@gmail.com", @@ -38,12 +38,14 @@ URL: https://github.com/KlausVigo/phangorn, https://klausvigo.github.io/phangorn/ BugReports: https://github.com/KlausVigo/phangorn/issues Depends: - ape (>= 5.7), + ape (>= 5.8), R (>= 4.1.0) Imports: digest, fastmatch, generics, + ggseqlogo, + ggplot2, graphics, grDevices, igraph (>= 1.0), @@ -62,12 +64,12 @@ Suggests: rgl, rmarkdown, seqinr, - seqLogo, + testthat (>= 3.0.0), tinytest, + vdiffr, xtable LinkingTo: - Rcpp, RcppArmadillo -Remotes: github::emmanuelparadis/ape + Rcpp VignetteBuilder: knitr, utils @@ -75,5 +77,6 @@ biocViews: Software, Technology, QualityControl Encoding: UTF-8 Repository: CRAN Roxygen: list(old_usage = TRUE) -RoxygenNote: 7.2.3 +RoxygenNote: 7.3.1 Language: en-US +Config/testthat/edition: 3 diff --git a/NAMESPACE b/NAMESPACE index 2a772a16..afb5fb29 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -19,9 +19,13 @@ S3method(as.data.frame,phyDat) S3method(as.matrix,splits) S3method(as.networx,phylo) S3method(as.networx,splits) +S3method(as.phyDat,AAStringSet) +S3method(as.phyDat,AAbin) +S3method(as.phyDat,DNAStringSet) S3method(as.phyDat,DNAbin) S3method(as.phyDat,MultipleAlignment) S3method(as.phyDat,alignment) +S3method(as.phyDat,ancestral) S3method(as.phyDat,character) S3method(as.phyDat,data.frame) S3method(as.phyDat,factor) @@ -40,6 +44,8 @@ S3method(cbind,phyDat) S3method(cophenetic,networx) S3method(cophenetic,splits) S3method(glance,phyDat) +S3method(glance,pml) +S3method(glance,pmlMix) S3method(hash,multiPhylo) S3method(hash,phylo) S3method(identify,networx) @@ -55,6 +61,7 @@ S3method(plot,pml) S3method(plot,pmlCluster) S3method(plot,pmlPart) S3method(print,SOWH) +S3method(print,ancestral) S3method(print,codonTest) S3method(print,phyDat) S3method(print,pml) @@ -97,12 +104,14 @@ export(addTrivialSplits) export(add_boxplot) export(add_ci) export(add_edge_length) +export(add_support) export(allCircularSplits) export(allCompat) export(allDescendants) export(allSitePattern) export(allSplits) export(allTrees) +export(ancestral) export(ancestral.pars) export(ancestral.pml) export(as.Matrix) @@ -123,6 +132,7 @@ export(coalSpeciesTree) export(codon2dna) export(codonTest) export(compatible) +export(composition_test) export(consensusNet) export(coords) export(createLabel) @@ -140,10 +150,13 @@ export(dist.p) export(distanceHadamard) export(distinct.splits) export(diversity) +export(dna2aa) export(dna2codon) export(edQt) export(fhm) export(fitch) +export(gap_as_ambiguous) +export(gap_as_state) export(genlight2phyDat) export(getClans) export(getClips) @@ -154,10 +167,13 @@ export(glance) export(h2st) export(h4st) export(hadamard) +export(has_gap_state) export(hash) +export(keep_as_tip) export(ldfactorial) export(lento) export(lli) +export(ltg2amb) export(map_duplicates) export(mast) export(matchSplits) @@ -184,6 +200,7 @@ export(phyDat2alignment) export(plotAnc) export(plotBS) export(plotRates) +export(plotSeqLogo) export(plot_gamma_plus_inv) export(pml) export(pml.control) @@ -203,7 +220,6 @@ export(rNNI) export(rSPR) export(random.addition) export(ratchet.control) -export(read.aa) export(read.nexus.dist) export(read.nexus.networx) export(read.nexus.partitions) @@ -218,6 +234,7 @@ export(simSeq) export(splitsNetwork) export(sprdist) export(superTree) +export(supgma) export(threshStateC) export(tidy) export(transferBootstrap) @@ -225,10 +242,12 @@ export(treedist) export(upgma) export(wRF.dist) export(wpgma) +export(write.ancestral) export(write.nexus.dist) export(write.nexus.networx) export(write.nexus.splits) export(write.phyDat) +export(write.pml) export(write.splits) export(writeDist) import(Rcpp) @@ -243,6 +262,14 @@ importFrom(Matrix,sparseMatrix) importFrom(fastmatch,fmatch) importFrom(generics,glance) importFrom(generics,tidy) +importFrom(ggplot2,facet_grid) +importFrom(ggplot2,facet_wrap) +importFrom(ggplot2,ggplot) +importFrom(ggplot2,scale_x_continuous) +importFrom(ggseqlogo,geom_logo) +importFrom(ggseqlogo,ggseqlogo) +importFrom(ggseqlogo,make_col_scheme) +importFrom(ggseqlogo,theme_logo) importFrom(grDevices,adjustcolor) importFrom(grDevices,col2rgb) importFrom(grDevices,hcl.colors) @@ -274,9 +301,9 @@ importFrom(graphics,title) importFrom(igraph,E) importFrom(igraph,all_shortest_paths) importFrom(igraph,decompose) -importFrom(igraph,graph) importFrom(igraph,graph_from_adjacency_matrix) importFrom(igraph,layout_nicely) +importFrom(igraph,make_graph) importFrom(igraph,shortest_paths) importFrom(igraph,topo_sort) importFrom(igraph,vcount) @@ -286,8 +313,12 @@ importFrom(stats,AIC) importFrom(stats,BIC) importFrom(stats,aggregate) importFrom(stats,as.dist) +importFrom(stats,binomial) +importFrom(stats,chisq.test) importFrom(stats,constrOptim) importFrom(stats,cophenetic) +importFrom(stats,cor) +importFrom(stats,dbinom) importFrom(stats,dgamma) importFrom(stats,ecdf) importFrom(stats,hclust) @@ -313,6 +344,7 @@ importFrom(stats,update) importFrom(stats,xtabs) importFrom(utils,combn) importFrom(utils,download.file) +importFrom(utils,head) importFrom(utils,installed.packages) importFrom(utils,packageDescription) importFrom(utils,read.table) diff --git a/NEWS b/NEWS index 19bb624f..9416a4ee 100644 --- a/NEWS +++ b/NEWS @@ -1,4 +1,45 @@ - CHANGES in PHANGORN VERSION 2.12.0 + CHANGES in PHANGORN VERSION 3.0.0 + +NEW FEATURES + + o new function supgma, performing serial sampled UPGMA + + (Drummond, Rodrigo 2000) + + o new function gap_as_state, treating gaps as a state. + + o new function keep_as_tip allows to prune a tree and keep nodes as a tip + + (suggested by Hedvig Skirgård) + + o nnls now can estimate trees with calibrations and tip dated trees + + o the functions Descendants, Ancestors, Siblings, mrca.phy now also accept a + + character vector for the node argument and not only integers. + + o Ancestral state reconstructionwas in parts rewritten. Output is now an + + object of class ancestral. E.g. ancestral states of constant sites are now + + always this state. + + o Improvements to several plot functions to get nicer plots out of the box. + + o Nicer defaults for plot.pml, mainly for rooted trees. + + o plotAnc got an argument scheme allowing to use different color schemes, + + especially for amino acid models shared with image.AAbin. + + o plot.networx gets two additional arguments, direction is in plot.phylo and + + angle to rotate the network. + + o new function composition_test comparing indicating possible problems + + with base / state composition. + OTHER CHANGES @@ -12,11 +53,17 @@ OTHER CHANGES which were implicitly using "estimated". - o some improvements to pmlPart + o some improvements to pmlPart. + + o the sankoff algorithm has been rewritten. + +BUG FIXES + + o pml_bb roots the tree, if method is either ultrametric or tipdated. Fixes - o nicer defaults for plot.pml + problem when modelTest object was supplied. - o the sankoff algorithm has been rewritten + o nnls.tree now checks if the tree has singletons. CHANGES in PHANGORN VERSION 2.11.0 diff --git a/R/Coalescent.R b/R/Coalescent.R index 2cff2f11..21925ad1 100644 --- a/R/Coalescent.R +++ b/R/Coalescent.R @@ -68,8 +68,13 @@ comp2 <- function(x, y) { #' @references Liu, L., Yu, L. and Pearl, D. K. (2010) Maximum tree: a #' consistent estimator of the species tree. \emph{Journal of Mathematical #' Biology}, \bold{60}, 95--106. +#' @examples +#' ## example in Liu et al. (2010) +#' tr1 <- read.tree(text = "(((B:0.05,C:0.05):0.01,D:0.06):0.04,A:0.1);") +#' tr2 <- read.tree(text = "(((A:0.07,C:0.07):0.02,D:0.09):0.03,B:0.12);") +#' TR <- c(tr1, tr2) +#' sp_tree <- coalSpeciesTree(TR) #' @keywords cluster -#' #' @export coalSpeciesTree <- function(tree, X = NULL, sTree = NULL) { if (is.null(X)) return(speciesTree(tree)) @@ -98,8 +103,7 @@ coalSpeciesTree <- function(tree, X = NULL, sTree = NULL) { Labels = attr(X, "levels"), Size = l, class = "dist", Diag = FALSE, Upper = FALSE ) - - sTree <- upgma(dm, "single") + sTree <- as.phylo(hclust(dm, method = "single")) # dm of pairwise states } else { diff --git a/R/Densi.R b/R/Densi.R index 5b4b2a7e..8708fe78 100644 --- a/R/Densi.R +++ b/R/Densi.R @@ -12,12 +12,12 @@ getAges <- function(x) { add_tiplabels <- function(xy, tip.label, direction, adj, font, srt = 0, cex = 1, - col = 1, label.offset = 0) { + col = 1, label.offset = 0, maxBT=1) { direction <- match.arg(direction, c("rightwards", "leftwards", "upwards", "downwards")) horizontal <- direction %in% c("rightwards", "leftwards") nTips <- length(tip.label) - xx <- rep(1, nrow(xy)) + xx <- rep(maxBT, nrow(xy)) yy <- xy[, 2 ] if (direction == "leftwards" | direction == "downwards") xx <- xx * 0 if (!horizontal) { @@ -91,10 +91,14 @@ add_tiplabels <- function(xy, tip.label, direction, adj, font, srt = 0, cex = 1, #' @param scale.bar a logical specifying whether add scale.bar to the plot. #' @param jitter allows to shift trees. a list with two arguments: the amount of #' jitter and random or equally spaced (see details below) +#' @param tip.dates A named vector of sampling times associated with the tips. +#' @param xlim the x limits of the plot. +#' @param ylim the y limits of the plot. #' @param \dots further arguments to be passed to plot. +#' @returns \code{densiTree} returns silently x. #' @author Klaus Schliep \email{klaus.schliep@@gmail.com} #' @seealso \code{\link{plot.phylo}}, \code{\link{plot.networx}}, -#' \code{\link{jitter}} +#' \code{\link{jitter}}, \code{\link{rtt}} #' @references densiTree is inspired from the great #' \href{https://www.cs.auckland.ac.nz/~remco/DensiTree/}{DensiTree} program of #' Remco Bouckaert. @@ -123,12 +127,13 @@ add_tiplabels <- function(xy, tip.label, direction, adj, font, srt = 0, cex = 1, #' } #' #' @export -densiTree <- function(x, type = "cladogram", alpha = 1 / length(x), - consensus = NULL, direction = "rightwards", optim = FALSE, - scaleX = FALSE, col = 1, width = 1, lty = 1, cex = .8, - font = 3, tip.color = 1, adj = 0, srt = 0, - underscore = FALSE, label.offset = 0, scale.bar = TRUE, - jitter = list(amount = 0, random = TRUE), ...) { +densiTree <- function(x, type = "phylogram", ..., alpha = 1 / length(x), + consensus = NULL, direction = "rightwards", optim = FALSE, + scaleX = FALSE, col = 1, width = 1, lty = 1, cex = .8, + font = 3, tip.color = 1, adj = 0, srt = 0, + underscore = FALSE, label.offset = 0, scale.bar = TRUE, + jitter = list(amount = 0, random = TRUE), tip.dates=NULL, + xlim=NULL, ylim=NULL) { if (!inherits(x, "multiPhylo")) stop("x must be of class multiPhylo") if (is.character(consensus)) { consensus <- stree(length(consensus), tip.label = consensus) @@ -158,51 +163,70 @@ densiTree <- function(x, type = "cladogram", alpha = 1 / length(x), nTip <- as.integer(length(consensus$tip.label)) consensus <- sort_tips(consensus) consensus <- reorder(consensus, "postorder") - + at <- NULL maxBT <- max(getAges(x)) - if (scaleX) maxBT <- 1.0 - label <- rev(pretty(c(maxBT, 0))) - maxBT <- max(label) + if(!is.null(tip.dates)){ + root_time <- max(tip.dates) - maxBT + label <- pretty(c(root_time, max(tip.dates)), min.n = 3) + label <- label[label < max(tip.dates)] + maxBT <- max(maxBT, max(tip.dates) - min(label)) + at <- maxBT - (max(tip.dates) - label) #/ maxBT + if(direction=="leftwards" || direction=="downwards") at <- at + maxBT - max(at) + scaleX <- FALSE + } + else { + if (scaleX) maxBT <- 1.0 + label <- rev(pretty(c(maxBT, 0))) + maxBT <- max(label, maxBT) + at <- seq(0, maxBT, length.out = length(label)) + } xy <- plotPhyloCoor(consensus, direction = direction, ...) yy <- xy[, 2] - tl <- which.max(nchar(consensus$tip.label)) if(horizontal) pin1 <- par("pin")[1] else pin1 <- par("pin")[2] - sw <- strwidth(consensus$tip.label[tl], "inch", cex = cex) / pin1 * 1.1 - + sw <- strwidth(consensus$tip.label[tl], "inch", cex = cex) / pin1 * 1.1 * maxBT + if(is.null(xlim)){ + xlim <- switch(direction, + rightwards = c(0, maxBT + sw), + leftwards = c(0 - sw, maxBT), + downwards = c(0, nTip + 1), + upwards = c(0, nTip + 1)) + } + if(is.null(ylim)){ + ylim <- switch(direction, + rightwards = c(0, nTip + 1), + leftwards = c(0, nTip + 1), + downwards = c(0 - sw, maxBT), + upwards = c(0, maxBT + sw)) + } if (direction == "rightwards") { - plot.default(0, type = "n", xlim = c(0, 1.0 + sw), ylim = c(0, nTip + 1), + plot.default(0, type = "n", xlim = xlim, ylim = ylim, xlab = "", ylab = "", axes = FALSE, ...) - if (scale.bar) axis(side = 1, at = seq(0, 1.0, length.out = length(label)), - labels = label) + if (scale.bar) axis(side = 1, at = at, labels = label, cex.axis=cex) } if (direction == "leftwards") { - plot.default(0, type = "n", xlim = c(0 - sw, 1.0), ylim = c(0, nTip + 1), + plot.default(0, type = "n", xlim = xlim, ylim = ylim, xlab = "", ylab = "", axes = FALSE, ...) - if (scale.bar) axis(side = 1, at = seq(0, 1.0, length.out = length(label)), - labels = rev(label)) + if (scale.bar) axis(side = 1, at = at, labels = rev(label), cex.axis=cex) } if (direction == "downwards") { - plot.default(0, type = "n", xlim = c(0, nTip + 1), ylim = c(0 - sw, 1.0), + plot.default(0, type = "n", xlim = xlim, ylim = ylim, xlab = "", ylab = "", axes = FALSE, ...) - if (scale.bar) axis(side = 2, at = seq(0, 1.0, length.out = length(label)), - labels = rev(label)) + if (scale.bar) axis(side = 2, at = at, labels = rev(label), cex.axis=cex) } if (direction == "upwards") { - plot.default(0, type = "n", xlim = c(0, nTip + 1), ylim = c(0, 1.0 + sw), + plot.default(0, type = "n", xlim = xlim, ylim = ylim, xlab = "", ylab = "", axes = FALSE, ...) - if (scale.bar) axis(side = 2, at = seq(0, 1.0, length.out = length(label)), - labels = label) + if (scale.bar) axis(side = 2, at = at, labels = label, cex.axis=cex) } tip_labels <- consensus$tip.label if (is.expression(consensus$tip.label)) underscore <- TRUE if (!underscore) tip_labels <- gsub("_", " ", tip_labels) - add_tiplabels(xy, tip_labels, direction, adj = adj, font = font, srt = srt, - cex = cex, col = tip.color, label.offset = label.offset) + cex = cex, col = tip.color, label.offset = label.offset, maxBT = maxBT) col <- rep(col, length.out = length(x)) tiporder <- 1:nTip @@ -224,14 +248,14 @@ densiTree <- function(x, type = "cladogram", alpha = 1 / length(x), if (horizontal) { if (scaleX) xx <- xx / max(xx) - else xx <- xx / maxBT - if (direction == "rightwards") xx <- xx + (1.0 - max(xx)) + else xx <- xx #/ maxBT + if (direction == "rightwards") xx <- xx + (maxBT - max(xx)) if (jitter$amount > 0) yy <- yy + jit[treeindex] } else { if (scaleX) yy <- yy / max(yy) - else yy <- yy / maxBT - if (direction == "upwards") yy <- yy + (1.0 - max(yy)) + #else yy <- yy + if (direction == "upwards") yy <- yy + (maxBT - max(yy)) if (jitter$amount > 0) xx <- xx + jit[treeindex] } e1 <- tmp$edge[, 1] @@ -246,4 +270,13 @@ densiTree <- function(x, type = "cladogram", alpha = 1 / length(x), edge.lty = lty) } } + L <- list(type = type, font = font, cex = cex, + adj = adj, srt = srt, #no.margin = no.margin, + label.offset = label.offset, + x.lim = xlim, y.lim = ylim, direction = direction, + tip.color = tip.color, Ntip = nTip #, Nnode = Nnode, + #root.time = x$root.time, align.tip.label = align.tip.label + ) + assign("last_plot.phylo", L, envir = .PlotPhyloEnv) + invisible(x) } diff --git a/R/ancestral.R b/R/ancestral.R new file mode 100644 index 00000000..ce2a45d9 --- /dev/null +++ b/R/ancestral.R @@ -0,0 +1,548 @@ +#' Ancestral character reconstruction. +#' +#' Marginal reconstruction of the ancestral character states. +#' +#' The argument "type" defines the criterion to assign the internal nodes. For +#' \code{ancestral.pml} so far "ml and marginal (empirical) "bayes" and for +#' \code{ancestral.pars} "MPR" and "ACCTRAN" are possible. +#' +#' The function return a list containing the tree with node labels, the original +#' alignment as an \code{phyDat} object, a data.frame containing the +#' probabilities belonging to a state for all (internal nodes) and the most +#' likely state. For parsimony and nucleotide data the most likely state might +#' be ambiguous. For ML this is very unlikely to be the case. +#' +#' If the input tree does not contain unique node labels the function +#' \code{ape::MakeNodeLabel} is used to create them. +#' +#' With parsimony reconstruction one has to keep in mind that there will be +#' often no unique solution. +#' +#' The functions use the node labels of the provided tree (also if part of the +#' \code{pml} object) if these are unique. Otherwise the function +#' \code{ape::MakeNodeLabel} is used to create them. +#' +#' For further details see vignette("Ancestral"). +#' +#' @param object an object of class pml +#' @param tree a tree, i.e. an object of class pml +#' @param data an object of class phyDat +#' @param type method used to assign characters to internal nodes, see details. +#' @param cost A cost matrix for the transitions between two states. +## @param return return a \code{phyDat} object or matrix of probabilities. +## @param x an object of class ancestral. +#' @param \dots Further arguments passed to or from other methods. +#' @return An object of class ancestral. This is a list containing the tree with +#' node labels, the original alignment as an \code{phyDat} object, a +#' \code{data.frame} containing the probabilities belonging to a state for all +#' (internal nodes) and the most likely state. +## For \code{return="phyDat"} an object of class "phyDat", containing +## the ancestral states of all nodes. For nucleotide data this can contain +## ambiguous states. Apart from fitch parsimony the most likely states are +## returned. +#' @author Klaus Schliep \email{klaus.schliep@@gmail.com} +#' @seealso \code{\link{pml}}, \code{\link{parsimony}}, \code{\link[ape]{ace}}, +#' \code{\link{plotAnc}}, \code{\link{ltg2amb}}, \code{\link{latag2n}}, +#' \code{\link{gap_as_state}}, \code{\link[ape]{root}}, +#' \code{\link[ape]{makeNodeLabel}} +#' @references Felsenstein, J. (2004). \emph{Inferring Phylogenies}. Sinauer +#' Associates, Sunderland. +#' +#' Swofford, D.L., Maddison, W.P. (1987) Reconstructing ancestral character +#' states under Wagner parsimony. \emph{Math. Biosci.} \bold{87}: 199--229 +#' +#' Yang, Z. (2006). \emph{Computational Molecular evolution}. Oxford University +#' Press, Oxford. +#' @keywords cluster +#' @importFrom fastmatch fmatch +#' @examples +#' +#' example(NJ) +#' # generate node labels to ensure plotting will work +#' tree <- makeNodeLabel(tree) +#' fit <- pml(tree, Laurasiatherian) +#' anc.ml <- ancestral.pml(fit, type = "ml") +#' anc.p <- ancestral.pars(tree, Laurasiatherian) +## \dontrun{ +## require(seqLogo) +## seqLogo( t(subset(anc.ml, 48, 1:20)[[1]]), ic.scale=FALSE) +## seqLogo( t(subset(anc.p, 48, 1:20)[[1]]), ic.scale=FALSE) +## } +#' # plot ancestral sequences at the root +#' plotSeqLogo( anc.ml, 48, 1, 20) +#' plotSeqLogo( anc.p, 48, 1, 20) +#' # plot the first character +#' plotAnc(anc.ml) +#' # plot the third character +#' plotAnc(anc.ml, 3) +#' +#' @rdname ancestral.pml +#' @export +ancestral.pml <- function(object, type = "marginal", ...) { + call <- match.call() + pt <- match.arg(type, c("marginal", "ml", "bayes")) # "joint", + tree <- object$tree + INV <- object$INV + inv <- object$inv + data <- getCols(object$data, tree$tip.label) + data_type <- attr(data, "type") + if (is.null(attr(tree, "order")) || attr(tree, "order") != "postorder") { + tree <- reorder(tree, "postorder") + } + nTips <- length(tree$tip.label) + node <- tree$edge[, 1] + edge <- tree$edge[, 2] + nNode <- Nnode(tree) + m <- length(edge) + 1 # max(edge) + w <- object$w + g <- object$g + l <- length(w) + nr <- attr(data, "nr") + nc <- attr(data, "nc") + dat <- vector(mode = "list", length = m * l) + result <- vector(mode = "list", length = nNode) + result2 <- vector(mode = "list", length = nNode) + dim(dat) <- c(l, m) + node_label <- makeAncNodeLabel(tree, ...) + tree$node.label <- node_label + tmp <- length(data) + + eig <- object$eig + + bf <- object$bf + el <- tree$edge.length + P <- getP(el, eig, g) + nr <- as.integer(attr(data, "nr")) + nc <- as.integer(attr(data, "nc")) + node <- as.integer(node - min(node)) + edge <- as.integer(edge - 1) + nTips <- as.integer(length(tree$tip.label)) + mNodes <- as.integer(max(node) + 1) + contrast <- attr(data, "contrast") + # proper format + eps <- 1.0e-5 + attrib <- attributes(data) + pos <- match(attrib$levels, attrib$allLevels) + nco <- as.integer(dim(contrast)[1]) + for (i in 1:l) dat[i, (nTips + 1):m] <- .Call('LogLik2', data, P[i, ], nr, nc, + node, edge, nTips, mNodes, contrast, nco) + parent <- tree$edge[, 1] + child <- tree$edge[, 2] + nTips <- min(parent) - 1 + # in C with scaling + for (i in 1:l) { + for (j in (m - 1):1) { + if (child[j] > nTips) { + tmp2 <- (dat[[i, parent[j]]] / (dat[[i, child[j]]] %*% P[[i, j]])) + dat[[i, child[j]]] <- (tmp2 %*% P[[i, j]]) * dat[[i, child[j]]] + } + } + } + for (j in unique(parent)) { + tmp <- matrix(0, nr, nc) + if (inv > 0) tmp <- as.matrix(INV) * inv + for (i in 1:l) { + # scaling!!! + tmp <- tmp + w[i] * dat[[i, j]] + } + if ((pt == "bayes") || (pt == "marginal")) tmp <- tmp * rep(bf, each = nr) + tmp <- tmp / rowSums(tmp) + + if (data_type == "DNA") { + tmp_max <- p2dna(tmp) + tmp_max <- fitchCoding2ambiguous(tmp_max) + } + else { + tmp_max <- pos[max.col(tmp)] + } + result[[j - nTips]] <- tmp + result2[[j - nTips]] <- tmp_max + } + ind <- identical_sites(data) + if(length(ind)>0){ + for(k in seq_len(nNode)){ + result[[k]][ind,] <- contrast[data[[1]][ind],] + result2[[k]][ind] <- data[[1]][ind] + } + } + attrib$names <- node_label + attributes(result2) <- attrib + attributes(result) <- attrib + result <- list2df_ancestral(result, result2) + result2 <- compress.phyDat(result2) + erg <- list(tree=tree, data=data, prob=result, state=result2) + class(erg) <- "ancestral" + erg +} + + +#' Export and convenience functions for ancestral reconstructions +#' +#' \code{write.ancestral} allows to export ancestral reconstructions. It writes +#' out the tree, a tab delimited text file with the probabilities and the +#' alignment. \code{ancestral} generates an object of class ancestral. +#' +#' This allows also to read in reconstruction made by iqtree to use the +#' plotting capabilities of R. +#' @param x an object of class ancestral. +#' @param file a file name. File endings are added. +#' @param ... Further arguments passed to or from other methods. +#' @returns \code{write.ancestral} returns the input x invisibly. +#' @seealso \code{\link{ancestral.pml}}, \code{\link{plotAnc}} +#' @examples +#' data(Laurasiatherian) +#' fit <- pml_bb(Laurasiatherian[,1:100], "JC", rearrangement = "none") +#' anc_ml <- ancestral.pml(fit) +#' write.ancestral(anc_ml) +#' # Can be also results from iqtree +#' align <- read.phyDat("ancestral_align.fasta") +#' tree <- read.tree("ancestral_tree.nwk") +#' df <- read.table("ancestral.state", header=TRUE) +#' anc_ml_disc <- ancestral(tree, align, df) +#' plotAnc(anc_ml_disc, 20) +#' unlink(c("ancestral_align.fasta", "ancestral_tree.nwk", "ancestral.state")) +#' @rdname write.ancestral +#' @export +write.ancestral <- function(x, file="ancestral"){ + stopifnot(inherits(x, "ancestral")) + write.phyDat(x$data, file=paste0(file, "_align.fasta")) + write.table(x$prob, file=paste0(file, ".state"), quote=FALSE, row.names=FALSE, + sep="\t") + write.tree(x$tree, file=paste0(file, "_tree.nwk")) + invisible(x) +} + + +#' @param tree an object of class phylo. +#' @param align an object of class phyDat. +#' @param prob an data.frame containing a matrix of posterior probabilities for +#' each state and site. +#' @importFrom utils head +#' @rdname write.ancestral +#' @export +ancestral <- function(tree, align, prob){ + stopifnot(inherits(tree, "phylo")) + stopifnot(inherits(align, "phyDat")) + stopifnot(inherits(prob, "data.frame")) + if(is.null(tree$node.label))stop("tree needs node.label") + state <- extract_states(prob, attr(align, "type"), + levels=attr(align, "levels")) + erg <- list(tree=tree, data=align[tree$tip.label], prob=prob, + state=state[tree$node.label]) + class(erg) <- "ancestral" + erg +} + + +extract_states <- function(x, type, levels=NULL){ + node_label <- unique(x$"Node") + y <- matrix(x$"State", nrow=length(node_label), byrow=TRUE, + dimnames = list(node_label, NULL)) + if(type %in% c("DNA", "AA")) return( phyDat(y, type=type) ) + phyDat(y, type=type, levels=levels) +} + +#' @rdname write.ancestral +#' @export +print.ancestral <- function(x, ...){ + stopifnot(inherits(x, "ancestral")) + print(x$tree) + cat("\n") + print(x$data) + cat("\n") + print(head(x$prob)) +} + +#' @rdname ancestral.pml +#' @param x an object of class ancestral +#' @export +as.phyDat.ancestral <- function(x, ...) { + rbind(x$data, x$state) +} + + + +highest_state <- function(x, ...) { + type <- attr(x, "type") + fun2 <- function(x) { + x <- p2dna(x) + fitchCoding2ambiguous(x) + } + if (type == "DNA" && !has_gap_state(x)) { + res <- lapply(x, fun2) + } + else { + eps <- 1.0e-5 + contr <- attr(x, "contrast") + attr <- attributes(x) + pos <- match(attr$levels, attr$allLevels) + res <- lapply(x, function(x, pos) pos[max.col(x)], pos) + } + attributes(res) <- attributes(x) + class(res) <- "phyDat" + return(res) +} + + +list2df_ancestral <- function(x, y=NULL, ...) { +# stopifnot(inherits(x, "ancestral")) + l <- length(x) + nr <- attr(x, "nr") + nc <- attr(x, "nc") + index <- attr(x, "index") + nr <- length(index) + nam <- names(x) + X <- matrix(0, l*length(index), nc) + j <- 0 + for(i in seq_len(l)){ + X[(j+1):(j+nr), ] <- x[[i]][index, ] + j <- j + nr + } + if(!is.null(y) & inherits(y, "phyDat")){ + y <- y[names(x)] + Y <- unlist(as.data.frame(y)) + res <- data.frame(Node=rep(nam, each=nr), Site=rep(seq_len(nr), l), Y, X) + colnames(res) <- c("Node", "Site", "State", paste0("p_", attr(x, "levels"))) + } else{ + res <- data.frame(Node=rep(nam, each=nr), Site=rep(seq_len(nr), l), X) + colnames(res) <- c("Node", "Site", paste0("p_", attr(x, "levels"))) + } + rownames(res) <- NULL + res +} + + +fitchCoding2ambiguous <- function(x, type = "DNA") { + y <- c(1L, 2L, 4L, 8L, 8L, 3L, 5L, 9L, 6L, 10L, 12L, 7L, 11L, 13L, + 14L, 15L, 15L, 15L) + fmatch(x, y) +} + + +#' @rdname ancestral.pml +#' @export +ancestral.pars <- function(tree, data, type = c("MPR", "ACCTRAN", "POSTORDER"), + cost = NULL, ...) { + #, return = "prob" + call <- match.call() + type <- match.arg(type) + tree$node.label <- makeAncNodeLabel(tree, ...) + contrast <- attr(data, "contrast") + data <- data[tree$tip.label,] + if (type == "ACCTRAN" || type=="POSTORDER") { + #, return = return + res <- ptree(tree, data, acctran=(type == "ACCTRAN")) +# attr(res, "call") <- call + } + if (type == "MPR") { + res <- mpr(tree, data, cost = cost) + #, return = return +# attr(res, "call") <- call + } + result <- res[[1]] + result2 <- res[[2]] + ind <- identical_sites(data) + if(length(ind)>0){ + for(k in seq_len(Nnode(tree))){ + result[[k]][ind,] <- contrast[data[[1]][ind],] + result2[[k]][ind] <- data[[1]][ind] + } + } +# attrib$names <- node_label +# attributes(result2) <- attrib +# attributes(result) <- attrib + result <- list2df_ancestral(result, result2) + result2 <- compress.phyDat(result2) # needed? + erg <- list(tree=tree, data=data, prob=result, state=result2) + class(erg) <- "ancestral" + erg +} + + +#' @rdname ancestral.pml +#' @export +pace <- ancestral.pars + + +mpr.help <- function(tree, data, cost = NULL) { + tree <- reorder(tree, "postorder") + if (!inherits(data, "phyDat")) stop("data must be of class phyDat") + levels <- attr(data, "levels") + l <- length(levels) + if (is.null(cost)) { + cost <- matrix(1, l, l) + cost <- cost - diag(l) + } + dat <- prepareDataSankoff(data) + datp <- pnodes(tree, dat, cost) + nr <- attr(data, "nr") + nc <- attr(data, "nc") + node <- as.integer(tree$edge[, 1] - 1L) + edge <- as.integer(tree$edge[, 2] - 1L) + res <- .Call('sankoffMPR', datp, as.numeric(cost), as.integer(nr), + as.integer(nc), node, edge, as.integer(Nnode(tree))) + root <- getRoot(tree) + res[[root]] <- datp[[root]] + res +} + +# , return = "prob" +mpr <- function(tree, data, cost = NULL, ...) { + data <- subset(data, tree$tip.label) + att <- attributes(data) + att$names <- tree$node.label + type <- att$type + nr <- att$nr + nc <- att$nc + res <- mpr.help(tree, data, cost) + l <- length(tree$tip.label) + m <- length(res) + nNode <- Nnode(tree) + ntips <- length(tree$tip.label) + contrast <- att$contrast + eps <- 5e-6 + rm <- apply(res[[ntips + 1]], 1, min) + RM <- matrix(rm, nr, nc) + eps + + fun <- function(X) { + rs <- rowSums(X) # apply(X, 1, sum) + X / rs + } +# for (i in 1:ntips) res[[i]] <- contrast[data[[i]], , drop = FALSE] + for (i in (ntips + 1):m) res[[i]][] <- as.numeric(res[[i]] < RM) + res <- res[(ntips + 1):m] +# if (return == "prob") { + # for(i in 1:ntips) res[[i]] <- contrast[data[[i]],,drop=FALSE] + res_prob <- lapply(res, fun) + attributes(res_prob) <- att + class(res_prob) <- c("ancestral", "phyDat") +# } + # else res[1:ntips] <- data[1:ntips] + fun2 <- function(x) { + x <- p2dna(x) + fitchCoding2ambiguous(x) + } +# if (return != "prob") { + if (type == "DNA") { + res_state <- lapply(res, fun2) + attributes(res_state) <- att + } + else { + attributes(res) <- att + res_state <- highest_state(res) + attributes(res_state) <- att + } +# res[1:ntips] <- data +# } + list(res_prob, res_state) +} + + +# +# ACCTRAN +# +acctran2 <- function(tree, data) { + if(!is.binary(tree)) tree <- multi2di(tree) + tree <- reorder(tree, "postorder") + edge <- tree$edge + data <- subset(data, tree$tip.label) + f <- init_fitch(data, FALSE, FALSE, m=2L) + psc_node <- f$pscore_node(edge) + tmp <- reorder(tree)$edge + tmp <- tmp[tmp[,2]>Ntip(tree), ,drop=FALSE] + f$traverse(edge) + if(length(tmp)>0)f$acctran_traverse(tmp) + psc <- f$pscore_acctran(edge) + el <- psc + parent <- unique(edge[,1]) + desc <- Descendants(tree, parent, "children") + for(i in seq_along(parent)){ + x <- psc_node[parent[i]] -sum(psc[desc[[i]]]) + if(x>0) el[desc[[i]] ] <- el[desc[[i]] ] + x/length(desc[[i]]) + } + tree$edge.length <- el[edge[,2]] + tree +} + + +#' @rdname parsimony +#' @export +acctran <- function(tree, data) { + if (inherits(tree, "multiPhylo")) { + compress <- FALSE + if (!is.null(attr(tree, "TipLabel"))){ + compress <- TRUE + tree <- .uncompressTipLabel(tree) + } + res <- lapply(tree, acctran2, data) + class(res) <- "multiPhylo" + if (compress) res <- .compressTipLabel(res) + return(res) + } + acctran2(tree, data) +} + +#, return = "prob" +ptree <- function(tree, data, acctran=TRUE, ...) { + tree <- reorder(tree, "postorder") + data <- subset(data, tree$tip.label) + edge <- tree$edge + att <- attributes(data) + att$names <- tree$node.label + nr <- att$nr + type <- att$type + m <- max(edge) + nNode <- Nnode(tree) + nTip <- Ntip(tree) + f <- init_fitch(data, FALSE, FALSE, m=2L) + f$traverse(edge) + tmp <- reorder(tree)$edge + tmp <- tmp[tmp[,2]>Ntip(tree),] + if(length(tmp)>0 && acctran==TRUE)f$acctran_traverse(tmp) + res <- res_state <- vector("list", nNode) +# res <- vector("list", m) + att$names <- tree$node.label #makeAncNodeLabel(tree, ...) +# else { + fun <- function(X) { + rs <- rowSums(X) + X / rs + } + contrast <- att$contrast +# for(i in seq_len(nTip)) res[[i]] <- contrast[data[[i]], , drop=FALSE] +# for(i in (nTip+1):m) res[[i]] <- f$getAnc(i)[1:nr, , drop=FALSE] + for(i in seq_len(nNode)) res[[i]] <- f$getAnc(i+nTip)[seq_len(nr), , drop=FALSE] + res <- lapply(res, fun) + attributes(res) <- att + class(res) <- c("ancestral", "phyDat") +# } + if(type=="DNA"){ + indx <- c(1, 2, 6, 3, 7, 9, 12, 4, 8, 10, 13, 11, 14, 15, 16) + # res[1:nTip] <- data[1:nTip] + for(i in seq_len(nNode)) # (nTip+1):m) + res_state[[i]] <- indx[f$getAncAmb(i+nTip)[1:nr]] + attributes(res_state) <- att + # return(res) + } else{ + res_state <- highest_state(res) + class(res_state) <- "phyDat" + } + list(res, res_state) +} + + +makeAncNodeLabel <- function(tree, ...){ + if(!is.null(tree$node.label)){ + node_label <- tree$node.label + if(length(unique(node_label)) == Nnode(tree)) return(node_label) + else message("Node labels are not unique, used makeNodeLabel(tree, ...) to create them!") + } + tree <- makeNodeLabel(tree, ...) + tree$node.label +} + + +identical_sites <- function(x){ + res <- rep(TRUE, attr(x, "nr")) + for(i in seq_along(x)) res <- res & (x[[i]] == x[[1]]) + which(res) +} diff --git a/R/ancestral_pml.R b/R/ancestral_pml.R deleted file mode 100644 index 5e8ef258..00000000 --- a/R/ancestral_pml.R +++ /dev/null @@ -1,411 +0,0 @@ -# -# ancestral sequences ML -# - - -#' Ancestral character reconstruction. -#' -#' Marginal reconstruction of the ancestral character states. -#' -#' The argument "type" defines the criterion to assign the internal nodes. For -#' \code{ancestral.pml} so far "ml" and (empirical) "bayes" and for -#' \code{ancestral.pars} "MPR" and "ACCTRAN" are possible. -#' -#' With parsimony reconstruction one has to keep in mind that there will be -#' often no unique solution. -#' -#' For further details see vignette("Ancestral"). -#' -#' @param object an object of class pml -#' @param tree a tree, i.e. an object of class pml -#' @param data an object of class phyDat -#' @param type method used to assign characters to internal nodes, see details. -#' @param i plots the i-th site pattern of the \code{data}. -#' @param col a vector containing the colors for all possible states. -#' @param cex.pie a numeric defining the size of the pie graphs -#' @param pos a character string defining the position of the legend -#' @param cost A cost matrix for the transitions between two states. -#' @param return return a \code{phyDat} object or matrix of probabilities. -#' @param \dots Further arguments passed to or from other methods. -#' @return %A matrix containing the the estimates character states. An object -#' of class "phyDat", containing the ancestral states of all nodes. -#' @author Klaus Schliep \email{klaus.schliep@@gmail.com} -#' @seealso \code{\link{pml}}, \code{\link{parsimony}}, \code{\link[ape]{ace}}, -#' \code{\link[ape]{root}} -#' @references Felsenstein, J. (2004). \emph{Inferring Phylogenies}. Sinauer -#' Associates, Sunderland. -#' -#' Swofford, D.L., Maddison, W.P. (1987) Reconstructing ancestral character -#' states under Wagner parsimony. \emph{Math. Biosci.} \bold{87}: 199--229 -#' -#' Yang, Z. (2006). \emph{Computational Molecular evolution}. Oxford University -#' Press, Oxford. -#' @keywords cluster -#' @importFrom fastmatch fmatch -#' @examples -#' -#' example(NJ) -#' fit <- pml(tree, Laurasiatherian) -#' anc.ml <- ancestral.pml(fit, type = "ml") -#' anc.p <- ancestral.pars(tree, Laurasiatherian) -#' \dontrun{ -#' require(seqLogo) -#' seqLogo( t(subset(anc.ml, 48, 1:20)[[1]]), ic.scale=FALSE) -#' seqLogo( t(subset(anc.p, 48, 1:20)[[1]]), ic.scale=FALSE) -#' } -#' # plot the first site pattern -#' plotAnc(tree, anc.ml, 1) -#' # plot the third character -#' plotAnc(tree, anc.ml, attr(anc.ml, "index")[3]) -#' -#' @rdname ancestral.pml -#' @export -ancestral.pml <- function(object, type = "marginal", return = "prob") { - call <- match.call() - pt <- match.arg(type, c("marginal", "joint", "ml", "bayes")) - tree <- object$tree - INV <- object$INV - inv <- object$inv - data <- getCols(object$data, tree$tip.label) - data_type <- attr(data, "type") - if (is.null(attr(tree, "order")) || attr(tree, "order") != "postorder") { - tree <- reorder(tree, "postorder") - } - nTips <- length(tree$tip.label) - node <- tree$edge[, 1] - edge <- tree$edge[, 2] - m <- length(edge) + 1 # max(edge) - w <- object$w - g <- object$g - l <- length(w) - nr <- attr(data, "nr") - nc <- attr(data, "nc") - dat <- vector(mode = "list", length = m * l) - result <- vector(mode = "list", length = m) - dim(dat) <- c(l, m) - - x <- attributes(data) - label <- as.character(1:m) - nam <- tree$tip.label - label[seq_along(nam)] <- nam - x[["names"]] <- label - tmp <- length(data) - - if (return != "phyDat") { - result <- new2old.phyDat(data) - } else { - result[1:nTips] <- data - } - eig <- object$eig - - bf <- object$bf - el <- tree$edge.length - P <- getP(el, eig, g) - nr <- as.integer(attr(data, "nr")) - nc <- as.integer(attr(data, "nc")) - node <- as.integer(node - min(node)) - edge <- as.integer(edge - 1) - nTips <- as.integer(length(tree$tip.label)) - mNodes <- as.integer(max(node) + 1) - contrast <- attr(data, "contrast") - # proper format - eps <- 1.0e-5 - ind1 <- which(apply(contrast, 1, function(x) sum(x > eps)) == 1L) - ind2 <- which(contrast[ind1, ] > eps, arr.ind = TRUE) - - pos <- ind2[match(seq_len(ncol(contrast)), ind2[, 2]), 1] - nco <- as.integer(dim(contrast)[1]) - for (i in 1:l) dat[i, (nTips + 1):m] <- .Call('LogLik2', data, P[i, ], nr, nc, - node, edge, nTips, mNodes, contrast, nco) - - parent <- tree$edge[, 1] - child <- tree$edge[, 2] - nTips <- min(parent) - 1 - # in C with scaling - for (i in 1:l) { - for (j in (m - 1):1) { - if (child[j] > nTips) { - tmp2 <- (dat[[i, parent[j]]] / (dat[[i, child[j]]] %*% P[[i, j]])) - dat[[i, child[j]]] <- (tmp2 %*% P[[i, j]]) * dat[[i, child[j]]] - } - } - } - for (j in unique(parent)) { - tmp <- matrix(0, nr, nc) - if (inv > 0) tmp <- as.matrix(INV) * inv - for (i in 1:l) { - # scaling!!! - tmp <- tmp + w[i] * dat[[i, j]] - } - if ((pt == "bayes") || (pt == "marginal")) tmp <- tmp * rep(bf, each = nr) - tmp <- tmp / rowSums(tmp) - - if (return == "phyDat") { - if (data_type == "DNA") { - tmp <- p2dna(tmp) - tmp <- fitchCoding2ambiguous(tmp) - } - else { - tmp <- pos[max.col(tmp)] - } - } - result[[j]] <- tmp - } - attributes(result) <- x - attr(result, "call") <- call - result -} - - -# joint_reconstruction <- function(object){ -# -# } - - -# in mpr -ancestral2phyDat <- function(x) { - eps <- 1.0e-5 - contr <- attr(x, "contrast") - # a bit too complicated - ind1 <- which(apply(contr, 1, function(x) sum(x > eps)) == 1L) - ind2 <- which(contr[ind1, ] > eps, arr.ind = TRUE) - # pos <- ind2[match(as.integer(1L:ncol(contr)), ind2[,2]),1] - pos <- ind2[match(seq_len(ncol(contr)), ind2[, 2]), 1] - # only first hit - res <- lapply(x, function(x, pos) pos[max.col(x)], pos) - attributes(res) <- attributes(x) - return(res) -} - - -fitchCoding2ambiguous <- function(x, type = "DNA") { - y <- c(1L, 2L, 4L, 8L, 8L, 3L, 5L, 9L, 6L, 10L, 12L, 7L, 11L, 13L, - 14L, 15L, 15L, 15L) - fmatch(x, y) -} - - -#' @rdname ancestral.pml -#' @export -ancestral.pars <- function(tree, data, type = c("MPR", "ACCTRAN", "POSTORDER"), - cost = NULL, return = "prob") { - call <- match.call() - type <- match.arg(type) - if (type == "ACCTRAN" || type=="POSTORDER") { - res <- ptree(tree, data, return = return, acctran=(type == "ACCTRAN")) - attr(res, "call") <- call - } - if (type == "MPR") { - res <- mpr(tree, data, cost = cost, return = return) - attr(res, "call") <- call - } - res -} - - -#' @rdname ancestral.pml -#' @export -pace <- ancestral.pars - - -mpr.help <- function(tree, data, cost = NULL) { - tree <- reorder(tree, "postorder") - if (!inherits(data, "phyDat")) { - stop("data must be of class phyDat") - } - levels <- attr(data, "levels") - l <- length(levels) - if (is.null(cost)) { - cost <- matrix(1, l, l) - cost <- cost - diag(l) - } - dat <- prepareDataSankoff(data) - datp <- pnodes(tree, dat, cost) - nr <- attr(data, "nr") - nc <- attr(data, "nc") - node <- as.integer(tree$edge[, 1] - 1L) - edge <- as.integer(tree$edge[, 2] - 1L) - res <- .Call('sankoffMPR', datp, as.numeric(cost), as.integer(nr), - as.integer(nc), node, edge, as.integer(Nnode(tree))) - root <- getRoot(tree) - res[[root]] <- datp[[root]] - res -} - - -mpr <- function(tree, data, cost = NULL, return = "prob") { - data <- subset(data, tree$tip.label) - att <- attributes(data) - type <- att$type - nr <- att$nr - nc <- att$nc - res <- mpr.help(tree, data, cost) - l <- length(tree$tip.label) - m <- length(res) - label <- as.character(1:m) - nam <- tree$tip.label - label[seq_along(nam)] <- nam - att[["names"]] <- label - ntips <- length(tree$tip.label) - contrast <- att$contrast - eps <- 5e-6 - rm <- apply(res[[ntips + 1]], 1, min) - RM <- matrix(rm, nr, nc) + eps - - fun <- function(X) { - rs <- rowSums(X) # apply(X, 1, sum) - X / rs - } - for (i in 1:ntips) res[[i]] <- contrast[data[[i]], , drop = FALSE] - for (i in (ntips + 1):m) res[[i]][] <- as.numeric(res[[i]] < RM) - if (return == "prob") { - # for(i in 1:ntips) res[[i]] <- contrast[data[[i]],,drop=FALSE] - if (return == "prob") res <- lapply(res, fun) - } - # else res[1:ntips] <- data[1:ntips] - attributes(res) <- att - fun2 <- function(x) { - x <- p2dna(x) - fitchCoding2ambiguous(x) - } - if (return != "prob") { - if (type == "DNA") { - res <- lapply(res, fun2) - attributes(res) <- att - } - else { - res <- ancestral2phyDat(res) - } - res[1:ntips] <- data - } - res -} - - -#' @rdname ancestral.pml -#' @param site.pattern logical, plot i-th site pattern or i-th site -#' @importFrom grDevices hcl.colors -#' @export -plotAnc <- function(tree, data, i = 1, site.pattern = TRUE, col = NULL, - cex.pie = par("cex"), pos = "bottomright", ...) { - y <- subset(data, select = i, site.pattern = site.pattern) - CEX <- cex.pie - xrad <- CEX * diff(par("usr")[1:2]) / 50 - levels <- attr(data, "levels") - nc <- attr(data, "nc") - y <- matrix(unlist(y[]), ncol = nc, byrow = TRUE) - l <- dim(y)[1] - dat <- matrix(0, l, nc) - for (i in 1:l) dat[i, ] <- y[[i]] - plot(tree, label.offset = 1.1 * xrad, plot = FALSE, ...) - lastPP <- get("last_plot.phylo", envir = .PlotPhyloEnv) - XX <- lastPP$xx - YY <- lastPP$yy - xrad <- CEX * diff(lastPP$x.lim * 1.1) / 50 - par(new = TRUE) - plot(tree, label.offset = 1.1 * xrad, plot = TRUE, ...) - if (is.null(col)) col <- hcl.colors(nc) #rainbow(nc) - if (length(col) != nc) { - warning("Length of color vector differs from number of levels!") - } - BOTHlabels( - pie = y, XX = XX, YY = YY, adj = c(0.5, 0.5), frame = "rect", pch = NULL, - sel = seq_along(XX), thermo = NULL, piecol = col, col = "black", - bg = "lightblue", horiz = FALSE, width = NULL, height = NULL, cex = cex.pie - ) - if (!is.null(pos)) legend(pos, legend=levels, text.col = col) -} - -# -# ACCTRAN -# - - -acctran2 <- function(tree, data) { - if(!is.binary(tree)) tree <- multi2di(tree) - tree <- reorder(tree, "postorder") - edge <- tree$edge - data <- subset(data, tree$tip.label) - f <- init_fitch(data, FALSE, FALSE, m=2L) - psc_node <- f$pscore_node(edge) - tmp <- reorder(tree)$edge - tmp <- tmp[tmp[,2]>Ntip(tree), ,drop=FALSE] - f$traverse(edge) - if(length(tmp)>0)f$acctran_traverse(tmp) - psc <- f$pscore_acctran(edge) - el <- psc #[edge[,2]] - parent <- unique(edge[,1]) - desc <- Descendants(tree, parent, "children") - for(i in seq_along(parent)){ - x <- psc_node[parent[i]] -sum(psc[desc[[i]]]) - if(x>0) el[desc[[i]] ] <- el[desc[[i]] ] + x/length(desc[[i]]) - } - tree$edge.length <- el[edge[,2]] - tree -} - - -#' @rdname parsimony -#' @export -acctran <- function(tree, data) { - if (inherits(tree, "multiPhylo")) { - compress <- FALSE - if (!is.null(attr(tree, "TipLabel"))){ - compress <- TRUE - tree <- .uncompressTipLabel(tree) - } - res <- lapply(tree, acctran2, data) - class(res) <- "multiPhylo" - if (compress) res <- .compressTipLabel(res) - return(res) - } - acctran2(tree, data) -} - - -ptree <- function(tree, data, return = "prob", acctran=TRUE) { - tree <- reorder(tree, "postorder") - data <- subset(data, tree$tip.label) - edge <- tree$edge - att <- attributes(data) - nr <- att$nr - type <- att$type - m <- max(edge) - nTip <- Ntip(tree) - f <- init_fitch(data, FALSE, FALSE, m=2L) - f$traverse(edge) - tmp <- reorder(tree)$edge - tmp <- tmp[tmp[,2]>Ntip(tree),] - if(length(tmp)>0 && acctran==TRUE)f$acctran_traverse(tmp) - res <- vector("list", m) - att$names <- c(att$names, as.character((nTip+1):m)) - if(return == "phyDat"){ - if(type=="DNA"){ - indx <- c(1, 2, 6, 3, 7, 9, 12, 4, 8, 10, 13, 11, 14, 15, 16) - res[1:nTip] <- data[1:nTip] - for(i in (nTip+1):m) - res[[i]] <- indx[f$getAncAmb(i)[1:nr]] - } - else stop("This is only for nucleotide sequences supported so far") - } - else { - fun <- function(X) { - rs <- rowSums(X) - X / rs - } - contrast <- att$contrast - for(i in seq_len(nTip)) res[[i]] <- contrast[data[[i]], , drop=FALSE] - for(i in (nTip+1):m) res[[i]] <- f$getAnc(i)[1:nr, , drop=FALSE] - res <- lapply(res, fun) - } - attributes(res) <- att - res -} - -#parsimony.plot <- function(tree, ...) { -# x <- numeric(max(tree$edge)) -# x[tree$edge[, 2]] <- tree$edge.length -# plot(tree, ...) -# ind <- get("last_plot.phylo", envir = .PlotPhyloEnv)$edge[, 2] -# edgelabels(prettyNum(x[ind]), frame = "none") -#} diff --git a/R/bab.R b/R/bab.R index ff8b57b3..2b2a4801 100644 --- a/R/bab.R +++ b/R/bab.R @@ -16,12 +16,10 @@ getOrder <- function(x) { nr <- attr(x, "nr") storage.mode(nr) <- "integer" -# n <- length(x) #- 1L weight <- attr(x, "weight") storage.mode(weight) <- "double" -# m <- nr * (2L * nTips - 2L) f <- init_fitch(x, FALSE, FALSE, m=4L) edge <- tree$edge @@ -36,9 +34,7 @@ getOrder <- function(x) { while (length(remaining) > 0) { edge <- tree$edge[, 2] + 2 * nTips - f$prep_spr(tree$edge) - l <- length(remaining) res <- numeric(l) nt <- numeric(l) @@ -57,7 +53,21 @@ getOrder <- function(x) { } -pBound <- function(x, UB, LB) { +seq_stats <- function(x){ + nr <- attr(x, "nr") + contrast <- attr(x, "contrast") + a <- seq_len(nr) + STATE <- POS <- matrix(0L, nrow(contrast), nr) + for(i in seq_along(x)){ + IND <- cbind(x[[i]], a) + STATE[IND] <- STATE[IND] + 1L + POS[IND] <- i + } + list(state=STATE, position=POS) +} + +# Incompatibility lower Bound +ilb <- function(x, LB) { nr <- attr(x, "nr") contrast <- attr(x, "contrast") rownames(contrast) <- attr(x, "allLevels") @@ -70,6 +80,7 @@ pBound <- function(x, UB, LB) { singles <- attr(x, "levels") fun2 <- function(x, singles) all(x %in% singles) fun1 <- function(x) cumsum(!duplicated(x)) - 1L + fun3 <- function(x) sum(!duplicated(x)) - 1L tmp <- apply(y, 2, fun2, singles) ind <- which(tmp) @@ -77,32 +88,18 @@ pBound <- function(x, UB, LB) { y <- y[, ind, drop = FALSE] weight0 <- weight0[ind] -# print(sum(weight0)) - UB <- UB[, ind, drop = FALSE] - single_dis <- apply(y, 2, fun1) - # single_dis <- LB - + single_dis <- LB[, ind] nTips <- nrow(y) l <- length(weight0) res <- numeric(nTips) - for (i in 1:(l - 1)) { for (j in (i + 1):l) { - # cat(i, j, "\n") if ((weight0[i] > 0) & (weight0[j] > 0)) { z <- paste(y[, i], y[, j], sep = "_") dis2 <- single_dis[, i] + single_dis[, j] - # D1 <- (dis2[nTips] - dis2) dis <- fun1(z) - # dis <- pmax(dis, dis2) - # D2 <- dis[nTips] - (UB[, i] + UB[, j]) if (dis[nTips] > dis2[nTips]) { -# ub <- UB[, i] + UB[, j] -# dis <- dis[nTips] - ub -# d2 <- dis2[nTips] - dis2 -# dis <- pmax(dis, d2) - d2 - # dis <- pmax(dis, dis2) - dis2 - dis <- dis - dis2 + dis <- pmax(dis, dis2) - dis2 if (sum(dis[4:nTips]) > 0) { wmin <- min(weight0[i], weight0[j]) weight0[i] <- weight0[i] - wmin @@ -114,12 +111,10 @@ pBound <- function(x, UB, LB) { if(weight0[i] < 1e-6) break() } } -# print(sum(weight0)) res } - #' Branch and bound for finding all most parsimonious trees #' #' \code{bab} finds all most parsimonious trees. @@ -140,6 +135,8 @@ pBound <- function(x, UB, LB) { #' @param tree a phylogenetic tree an object of class phylo, otherwise a #' pratchet search is performed. #' @param trace defines how much information is printed during optimization. +## @param ILBound compute incompatibility lower bound (default TRUE) of +## Holland (2005). #' @param \dots Further arguments passed to or from other methods #' @return \code{bab} returns all most parsimonious trees in an object of class #' \code{multiPhylo}. @@ -165,16 +162,19 @@ pBound <- function(x, UB, LB) { #' gene12 <- yeast[, 1:3158] #' trees <- bab(gene12) #' -#' @export bab -bab <- function(data, tree = NULL, trace = 1, ...) { - if (!is.null(tree)) data <- subset(data, tree$tip.label) - pBound <- TRUE - +#' @export +bab <- function(data, tree = NULL, trace = 0, ...) { + if (hasArg(ILBound)) + ILBound <- list(...)$ILBound + else ILBound <- FALSE + if(inherits(data, "DNAbin") | inherits(data, "AAbin")) data <- as.phyDat(data) + if (!inherits(data, "phyDat")) stop("data must be of class phyDat") + compress <- TRUE + recursive <- TRUE nTips <- length(data) if (nTips < 4) return(stree(nTips, tip.label = names(data))) - # New - data <- removeParsimonyUninfomativeSites(data, recursive=TRUE) + data <- removeParsimonyUninfomativeSites(data, recursive=recursive) star_tree <- ifelse(attr(data, "nr") == 0, TRUE, FALSE) add_taxa <- ifelse(is.null(attr(data, "duplicated")), FALSE, TRUE) p0 <- attr(data, "p0") @@ -191,37 +191,32 @@ bab <- function(data, tree = NULL, trace = 1, ...) { # compress sequences (all transitions count equal) data <- compressSites(data) - o <- order(attr(data, "weight"), decreasing = TRUE) data <- subset(data, select = o, site.pattern=TRUE) - tree <- pratchet(data, start = tree, trace = trace - 1, ...) - - data <- subset(data, tree$tip.label) nr <- as.integer(attr(data, "nr")) inord <- getOrder(data) - nTips <- m <- length(data) + data <- data[inord,] + tree <- pratchet(data, start = tree, trace = trace - 1, maxit=10, + all=FALSE, ...) + p_vec <- fitch(tree, data, "site") - nr <- as.integer(attr(data, "nr")) - TMP <- UB <- matrix(0, m, nr) - for (i in 4:m) { - TMP[i, ] <- lowerBound(subset(data, inord[1:i])) - UB[i, ] <- upperBound(subset(data, inord[1:i])) + nTips <- m <- length(data) + TMP <- matrix(0, m, nr) # UB <- + for (i in 2:m) { + TMP[i, ] <- lowerBound(data[1:i,]) } - dat_used <- subset(data, inord) - weight <- as.double(attr(data, "weight")) m <- nr * (2L * nTips - 2L) - + # Single column discrepancy mmsAmb <- TMP %*% weight -# mmsAmb <- mmsAmb[nTips] - mmsAmb - mms0 <- 0 - if (pBound) mms0 <- pBound(dat_used, UB, TMP) - mms0 <- mms0 + mmsAmb + # mmsAmb <- mmsAmb[nTips] - mmsAmb + mms0 <- mms1 <- 0 + if (ILBound) mms1 <- ilb(data, TMP) + mms0 <- mms1 + mmsAmb mms0 <- mms0[nTips] - mms0 - mms0 <- c(mms0, 0) f <- init_fitch(data, m=4L) @@ -231,7 +226,7 @@ bab <- function(data, tree = NULL, trace = 1, ...) { if (trace > 1) print(paste("upper bound:", bound + p0)) startTree <- structure(list(edge = structure(c(rep(nTips + 1L, 3), - as.integer(inord)[1:3]), .Dim = c(3L, 2L)), tip.label = tree$tip.label, + as.integer(1:3)), .Dim = c(3L, 2L)), tip.label = names(data), Nnode = 1L), .Names = c("edge", "tip.label", "Nnode"), class = "phylo", order = "postorder") @@ -264,15 +259,15 @@ bab <- function(data, tree = NULL, trace = 1, ...) { edge <- tmpTree[, 2] + 2 * nTips f$prep_spr(tmpTree) - score <- f$pscore_vec(edge, as.integer(inord[a + 1L])) + score <- f$pscore_vec(edge, as.integer(a + 1L)) score <- score + blub + mms0[a + 1L] ms <- min(score) if (ms < bound + .1) { if ((a + 1L) < nTips) { ind <- (1:L[a])[score <= bound] trees[[a + 1]][seq_along(ind)] <- .Call('AddOnes', tmpTree, - as.integer(inord[a + 1L]), as.integer(ind), as.integer(L[a]), - as.integer(M[a])) + as.integer(a + 1L), as.integer(ind), as.integer(L[a]), + as.integer(M[a])) l <- length(ind) # os <- order(score[ind], decreasing=TRUE) os <- seq_len(l) @@ -286,8 +281,8 @@ bab <- function(data, tree = NULL, trace = 1, ...) { ind <- which(score == ms) tmp <- vector("list", length(ind)) tmp[seq_along(ind)] <- .Call('AddOnes', tmpTree, - as.integer(inord[a + 1L]), as.integer(ind), - as.integer(L[a]), as.integer(M[a])) + as.integer(a + 1L), as.integer(ind), + as.integer(L[a]), as.integer(M[a])) if (ms < bound) { bound <- ms if (trace) cat("upper bound:", bound + p0, "\n") @@ -301,12 +296,11 @@ bab <- function(data, tree = NULL, trace = 1, ...) { } for (i in seq_along(result)) { result[[i]] <- structure(list(edge = result[[i]], Nnode = nTips - 2L), - .Names = c("edge", "Nnode"), class = "phylo", order = "postorder") + .Names = c("edge", "Nnode"), class = "phylo", order = "postorder") } attr(result, "TipLabel") <- tree$tip.label - attr(result, "visited") <- visited class(result) <- "multiPhylo" if(add_taxa) result <- addTaxa(result, attr(data, "duplicated")) + attr(result, "visited") <- visited return(result) } - diff --git a/R/baseFreq.R b/R/baseFreq.R index ebe332e8..8e8629d7 100644 --- a/R/baseFreq.R +++ b/R/baseFreq.R @@ -3,6 +3,8 @@ #' \code{baseFreq} computes the frequencies (absolute or relative) of the states #' from a sample of sequences. #' \code{glance} computes some useful information about the alignment. +#' \code{composition\_test} computes a \eqn{\chi^2}-test testing if the state +#' composition for a species differs. #' #' @param obj,x as object of class phyDat #' @param freq logical, if 'TRUE', frequencies or counts are returned otherwise @@ -28,6 +30,7 @@ #' baseFreq(chloroplast) #' glance(Laurasiatherian) #' glance(chloroplast) +#' composition_test(Laurasiatherian)[1:10,] #' @rdname baseFreq #' @export baseFreq <- function(obj, freq=FALSE, all=FALSE, drop.unused.levels = FALSE){ @@ -74,3 +77,24 @@ glance.phyDat <- function (x, ...){ parsimony_informative_sites=parsimony_informative_sites, const_sites=const_site(x)) } + + +#' @rdname baseFreq +#' @importFrom stats chisq.test +#' @export +composition_test <- function(obj){ + stopifnot(inherits(obj,"phyDat")) + labels <- attr(obj, "allLevels") + levs <- attr(obj, "levels") + weight <- attr(obj,"weight") + n <- length(obj) + ALL <- baseFreq(obj, freq=TRUE) + res <- matrix(0, n, 3, dimnames = list(names(obj), + c("statistic", "parameter df", "p-value"))) + for(i in seq_len(n)){ + tmp <- baseFreq(obj[i], freq=TRUE) + res[i, ] <- unlist(chisq.test(rbind(ALL-tmp, tmp))[1:3]) + } + res +} + diff --git a/R/bootstrap.R b/R/bootstrap.R index d0dace12..b0d6ac67 100644 --- a/R/bootstrap.R +++ b/R/bootstrap.R @@ -83,12 +83,15 @@ bootstrap.pml <- function(x, bs = 100, trees = TRUE, multicore = FALSE, if(.Platform$OS.type=="windows") multicore <- FALSE if (multicore && is.null(mc.cores)) mc.cores <- min(detectCores()-1L, 4L) if(multicore && mc.cores < 2L) multicore <- FALSE - if(is_rooted(x$tree)){ + if(is.rooted(x$tree)){ if(is.ultrametric(x$tree)) method <- "ultrametric" else method <- "tipdated" + optRooted <- TRUE + } + else { + method <- "unrooted" + optRooted <- FALSE } - else method <- "unrooted" - extras <- match.call(expand.dots = FALSE)$... rearr <- c("optNni", "rearrangement") tmp <- pmatch(names(extras), rearr) @@ -106,7 +109,9 @@ bootstrap.pml <- function(x, bs = 100, trees = TRUE, multicore = FALSE, tmp <- pmatch("optRooted", names(extras)) if(!is.na(tmp)){ is_ultrametric <- extras$optRooted + optRooted <- extras$optRooted } + else if(optRooted) extras <- append(extras, list(optRooted=TRUE)) data <- x$data weight <- attr(data, "weight") v <- rep(seq_along(weight), weight) @@ -128,7 +133,8 @@ bootstrap.pml <- function(x, bs = 100, trees = TRUE, multicore = FALSE, tip.dates = tip.dates) fit <- update(fit, tree = tree) } - fit <- optim.pml(fit, ...) +# fit <- optim.pml(fit, ...) + fit <- do.call(optim.pml, append(list(object=fit), extras)) if (trees) { tree <- fit$tree return(tree) @@ -214,149 +220,6 @@ checkLabels <- function(tree, tip) { } -#' Plotting trees with bootstrap values -#' -#' \code{plotBS} plots a phylogenetic tree with the bootstrap values assigned -#' to the (internal) edges. It can also used to assign bootstrap values to a -#' phylogenetic tree. -#' -#' \code{plotBS} can either assign the classical Felsenstein’s bootstrap -#' proportions (FBP) (Felsenstein (1985), Hendy & Penny (1985)) or the -#' transfer bootstrap expectation (TBE) of Lemoine et al. (2018). Using the -#' option \code{type=="n"} just assigns the bootstrap values and return the tree -#' without plotting it. -#' -#' @param tree The tree on which edges the bootstrap values are plotted. -#' @param BStrees a list of trees (object of class "multiPhylo"). -#' @param type the type of tree to plot, one of "phylogram", "cladogram", "fan", -#' "unrooted", "radial" or "none". If type is "none" the tree is returned with -#' the bootstrap values assigned to the node labels. -#' @param method either "FBP" the classical bootstrap (default) or "TBE" -#' (transfer bootstrap) -#' @param bs.col color of bootstrap support labels. -#' @param bs.adj one or two numeric values specifying the horizontal and -#' vertical justification of the bootstrap labels. -#' @param digits integer indicating the number of decimal places. -#' @param p only plot support values higher than this percentage number -#' (default is 0). -#' @param \dots further parameters used by \code{plot.phylo}. -#' @param frame a character string specifying the kind of frame to be printed -#' around the bootstrap values. This must be one of "none" (the default), -#' "rect" or "circle". -#' @return \code{plotBS} returns silently a tree, i.e. an object of class -#' \code{phylo} with the bootstrap values as node labels. The argument -#' \code{BStrees} is optional and if not supplied the labels supplied -#' in the \code{node.label} slot will be used. -#' @author Klaus Schliep \email{klaus.schliep@@gmail.com} -#' @seealso \code{\link{transferBootstrap}}, \code{\link{plot.phylo}}, -#' \code{\link{maxCladeCred}}, \code{\link{nodelabels}}, -#' \code{\link{consensus}}, \code{\link{consensusNet}} -#' @references Felsenstein J. (1985) Confidence limits on phylogenies. An -#' approach using the bootstrap. \emph{Evolution} \bold{39}, 783--791 -#' -#' Lemoine, F., Entfellner, J. B. D., Wilkinson, E., Correia, D., Felipe, M. D., -#' De Oliveira, T., & Gascuel, O. (2018). Renewing Felsenstein’s phylogenetic -#' bootstrap in the era of big data. \emph{Nature}, \bold{556(7702)}, 452--456. -#' -#' Penny D. and Hendy M.D. (1985) Testing methods evolutionary tree -#' construction. \emph{Cladistics} \bold{1}, 266--278 -#' -#' Penny D. and Hendy M.D. (1986) Estimating the reliability of evolutionary -#' trees. \emph{Molecular Biology and Evolution} \bold{3}, 403--417 -#' @examples -#' fdir <- system.file("extdata/trees", package = "phangorn") -#' # RAxML best-known tree with bipartition support (from previous analysis) -#' raxml.tree <- read.tree(file.path(fdir,"RAxML_bipartitions.woodmouse")) -#' # RAxML bootstrap trees (from previous analysis) -#' raxml.bootstrap <- read.tree(file.path(fdir,"RAxML_bootstrap.woodmouse")) -#' par(mfrow=c(1,2)) -#' plotBS(raxml.tree, raxml.bootstrap, "p") -#' plotBS(raxml.tree, raxml.bootstrap, "p", "TBE") -#' @export -plotBS <- function(tree, BStrees, type = "phylogram", - method="FBP", bs.col = "black", - bs.adj = NULL, digits=3, p = 0, frame = "none", ...) { - type <- match.arg(type, c("phylogram", "cladogram", "fan", "unrooted", - "radial", "none")) - method <- match.arg(method, c("FBP", "TBE")) - if (hasArg(BStrees)) { - if(method=="FBP"){ - BStrees <- .uncompressTipLabel(BStrees) # check if needed - if (any(is_rooted(BStrees))) BStrees <- unroot(BStrees) - x <- prop.clades(tree, BStrees) - x <- (x / length(BStrees)) * 100 - tree$node.label <- x - } - else { - tree <- transferBootstrap(tree, BStrees) - x <- tree$node.label - } - } - else { - if (is.null(tree$node.label)) stop("You need to supply 'trees' or the tree needs support-values as node.label") - x <- tree$node.label - } - if(type=="none") return( tree ) - plot(tree, type = type, ...) - - label <- c(rep(0, length(tree$tip.label)), x) - ind <- get("last_plot.phylo", envir = .PlotPhyloEnv)$edge[ ,2 ] - if (type == "phylogram" | type == "cladogram") { - root <- getRoot(tree) - label <- c(rep(0, length(tree$tip.label)), x) - label[root] <- 0 - ind <- which(label > p) - if (is.null(bs.adj)) { - bs.adj <- c(1, 1) - } - if (length(ind) > 0) { - if(is.numeric(label)) label <- round(label, digits = digits) - nodelabels( - text = label[ind], node = ind, - frame = frame, col = bs.col, adj = bs.adj, ... - ) - } - } - else { - if (is.null(bs.adj)) { - bs.adj <- c(0.5, 0.5) - } - ind2 <- which(label[ind] > p) - if (length(ind2 > 0)) { - if(is.numeric(label)) label <- round(label, digits = digits) - edgelabels(label[ind][ind2], ind2, - frame = frame, - col = bs.col, adj = bs.adj, ... - ) - } - } - invisible(tree) -} - - - -is_rooted <- function(phy) UseMethod("is_rooted") - -.is_rooted_ape <- function(phy, ntips) -{ - if (!is.null(phy$root.edge)) return(TRUE) - if (tabulate(phy$edge[, 1])[ntips + 1] > 2) FALSE else TRUE -} - -is_rooted.phylo <- function (phy) - .is_rooted_ape(phy, length(phy$tip.label)) - -is_rooted.multiPhylo <- function(phy) -{ - labs <- attr(phy, "TipLabel") - class(phy) <- NULL - if (is.null(labs)) sapply(phy, is_rooted.phylo) - else sapply(phy, .is_rooted_ape, ntips = length(labs)) -} - - - - cladeMatrix <- function(x, rooted = FALSE) { if (!rooted) x <- unroot(x) pp <- prop.part(x) diff --git a/R/candidate_tree.R b/R/candidate_tree.R index dfa838ec..6d186b4c 100644 --- a/R/candidate_tree.R +++ b/R/candidate_tree.R @@ -24,6 +24,7 @@ minEdge <- function(tree, tau=1e-8, enforce_ultrametric=FALSE){ #' @rdname phangorn-internal +#' @importFrom stats cor #' @export candidate_tree <- function(x, method=c("unrooted", "ultrametric", "tipdated"), eps = 1e-8, tip.dates=NULL, ...){ @@ -35,7 +36,8 @@ candidate_tree <- function(x, method=c("unrooted", "ultrametric", "tipdated"), enforce_ultrametric <- TRUE } if(method=="unrooted"){ - tree <- pratchet(x, maxit=10L, trace=0, perturbation = "ratchet") + tree <- pratchet(x, maxit=5L, trace=0, perturbation = "stochastic", + all=FALSE) tree <- multi2di(tree) tree <- unroot(tree) tree <- acctran(tree, x) @@ -45,11 +47,27 @@ candidate_tree <- function(x, method=c("unrooted", "ultrametric", "tipdated"), if(is.null(tip.dates)) stop("Argument tip.dates is missing!") if(is.null(names(tip.dates))) names(tip.dates) <- names(x) dm <- dist.ml(x, ...) - tree <- fastme.ols(dm) - tree <- rtt(tree, tip.dates[tree$tip.label]) - tree <- nnls.tree(dm, tree, method = "tipdated", - tip.dates=tip.dates[tree$tip.label]) - tree$tip.dates <- tip.dates[tree$tip.label] + tree <- supgma(dm, tip.dates) } minEdge(tree, tau=eps, enforce_ultrametric=enforce_ultrametric) } + + +# like is.ultrametric +check_tip_dates <- function(tree, tip.dates){ + tip.dates <- tip.dates[tree$tip.label] + nh <- node.depth.edgelength(tree)[seq_along(tree$tip.label)] + isTRUE(all.equal(cor(tip.dates,nh), 1)) +} + + +proper_tree <- function(x, tree, method=c("ultrametric", "tipdated"), + tip.dates=NULL, eps = 1e-8, ...){ + method <- match.arg(method, c("ultrametric", "tipdated")) + if(!is.null(tree$edge.length)){ + if(method=="ultrametric" && is.ultrametric(tree)) return(tree) + if(method=="tipdated" && check_tip_dates(tree, tip.dates)) return(tree) + } + dm <- dist.ml(x, ...) + nnls.tree(dm, tree, method) +} diff --git a/R/codon.R b/R/codon.R index 2495c384..64769a3e 100644 --- a/R/codon.R +++ b/R/codon.R @@ -1,7 +1,8 @@ #' Translate nucleic acid sequences into codons #' #' The function transforms \code{dna2codon} DNA sequences to codon sequences, -#' \code{codon2dna} transform the other way. +#' \code{codon2dna} transform the other way. \code{dna2codon} translates +#' nucleotide to amino acids using the function \code{\link{trans}}. #' #' The following genetic codes are described here. The number preceding each #' corresponds to the code argument. @@ -57,9 +58,8 @@ #' @rdname dna2codon #' @export dna2codon <- function(x, codonstart=1, code=1, ambiguity="---", ...){ - if(!inherits(x, "phyDat"))stop("x needs to be of class phyDat!") - if(attr(x, "type")=="AA")stop("x needs to be a nucleotide sequence!") - + if(!inherits(x, "phyDat"))stop("x must be of class phyDat") + if(attr(x, "type")!="DNA")stop("x must be a nucleotide sequence!") if(codonstart>1){ del <- -seq_len(codonstart) x <- subset(x, select=del, site.pattern=FALSE) @@ -76,11 +76,21 @@ dna2codon <- function(x, codonstart=1, code=1, ambiguity="---", ...){ #' @rdname dna2codon #' @export codon2dna <- function(x){ - if(!inherits(x, "phyDat"))stop("x needs to be of class phyDat!") + stopifnot(inherits(x, "phyDat")) phyDat.DNA(as.character(x)) } +#' @rdname dna2codon +#' @export +dna2aa <- function(x, codonstart=1, code=1){ + stopifnot(inherits(x, "phyDat")) + if(attr(x, "type")!="DNA")stop("x must be a nucleotide sequence!") + dna <- as.DNAbin(x) + aa <- as.phyDat(trans(dna, code=code, codonstart=codonstart)) +} + + synonymous_subs <- function(code=1, stop.codon=FALSE){ tmp <- .CODON[, as.character(code)] label <- rownames(.CODON) diff --git a/R/codonTest.R b/R/codonTest.R index 7303c2d4..7006fe0e 100644 --- a/R/codonTest.R +++ b/R/codonTest.R @@ -133,7 +133,7 @@ codonTest <- function(tree, object, model = c("M0", "M1a", "M2a"), } -# tidy codon +#' @export glance.pml <- function(x, ...) { res <- data.frame(logLik = x$logLik, df = x$df, @@ -142,7 +142,7 @@ glance.pml <- function(x, ...) { res } - +#' @export glance.pmlMix <- function(x, ...) { nr <- attr(x$fits[[1]]$data, "nr") res <- data.frame(logLik = x$logLik, diff --git a/R/consensusNet.R b/R/consensusNet.R index 19ff59f5..6bf32f50 100644 --- a/R/consensusNet.R +++ b/R/consensusNet.R @@ -27,7 +27,7 @@ #' bs <- bootstrap.phyDat(Laurasiatherian, FUN = function(x)nj(dist.hamming(x)), #' bs=50) #' cnet <- consensusNet(bs, .3) -#' plot(cnet) +#' plot(cnet, angle=-60, direction="axial") #' \dontrun{ #' library(rgl) #' open3d() @@ -51,7 +51,8 @@ consensusNet <- function(obj, prob = 0.3, ...) { attr(spl, "confidences") <- (w / l)[ind] # attr(spl, "weights") = w[ind] res <- as.networx(spl) - res$edge.labels <- as.character(res$edge.length / l * 100) - res$edge.labels[res$edge[, 2] <= length(res$tip.label)] <- "" + res$edge.labels <- res$edge.length / l * 100 + #as.character(res$edge.length / l * 100) + res$edge.labels[res$edge[, 2] <= length(res$tip.label)] <- NA_real_ #"" reorder(res) } diff --git a/R/discrete.gamma.R b/R/discrete.gamma.R index f4b5a204..25422071 100644 --- a/R/discrete.gamma.R +++ b/R/discrete.gamma.R @@ -31,7 +31,7 @@ #' @param \dots Further arguments passed to or from other methods. #' @return \code{discrete.gamma} returns a matrix. #' @author Klaus Schliep \email{klaus.schliep@@gmail.com} -#' @seealso \code{\link{pml.fit}, \link{stepfun}, link{pgamma}, link{pbeta}}, +#' @seealso \code{\link{pml.fit}, \link{stepfun}, \link{pgamma}, \link{pbeta}}, #' @examples #' discrete.gamma(1, 4) #' diff --git a/R/distSeq.R b/R/distSeq.R index f6249912..de1c0cdf 100644 --- a/R/distSeq.R +++ b/R/distSeq.R @@ -63,8 +63,7 @@ #' @export dist.hamming <- function(x, ratio = TRUE, exclude = "none"){ if(inherits(x, "DNAbin") | inherits(x, "AAbin")) x <- as.phyDat(x) - if (!inherits(x, "phyDat")) - stop("x must be of class phyDat") + if (!inherits(x, "phyDat")) stop("x must be of class phyDat") l <- length(x) contrast <- attr(x, "contrast") @@ -230,8 +229,7 @@ dist.ml <- function(x, model = "JC69", exclude = "none", bf = NULL, Q = NULL, dist.logDet <- function(x) { if(inherits(x, "DNAbin") | inherits(x, "AAbin")) x <- as.phyDat(x) - if (!inherits(x, "phyDat")) - stop("x must be of class phyDat") + if (!inherits(x, "phyDat")) stop("x must be of class phyDat") weight <- attr(x, "weight") contrast <- attr(x, "contrast") r <- attr(x, "nc") diff --git a/R/distTree.R b/R/distTree.R index 9ec155eb..2774bb19 100644 --- a/R/distTree.R +++ b/R/distTree.R @@ -1,48 +1,3 @@ -#' UPGMA and WPGMA -#' -#' UPGMA and WPGMA clustering. Just a wrapper function around -#' \code{\link[stats]{hclust}}. -#' -#' -#' @param D A distance matrix. -#' @param method The agglomeration method to be used. This should be (an -#' unambiguous abbreviation of) one of "ward", "single", "complete", "average", -#' "mcquitty", "median" or "centroid". The default is "average". -#' @param \dots Further arguments passed to or from other methods. -#' @return A phylogenetic tree of class \code{phylo}. -#' @author Klaus Schliep \email{klaus.schliep@@gmail.com} -#' @seealso \code{\link{hclust}}, \code{\link{dist.hamming}}, \code{\link{NJ}}, -#' \code{\link{as.phylo}}, \code{\link{fastme}}, \code{\link{nnls.tree}} -#' @keywords cluster -#' @examples -#' -#' data(Laurasiatherian) -#' dm <- dist.ml(Laurasiatherian) -#' tree <- upgma(dm) -#' plot(tree) -#' -#' @rdname upgma -#' @export -"upgma" <- function(D, method = "average", ...) { - DD <- as.dist(D) - hc <- hclust(DD, method = method, ...) - result <- as.phylo(hc) - result <- reorder(result, "postorder") - result -} - - -#' @rdname upgma -#' @export -"wpgma" <- function(D, method = "mcquitty", ...) { - DD <- as.dist(D) - hc <- hclust(DD, method = method, ...) - result <- as.phylo(hc) - result <- reorder(result, "postorder") - result -} - - #' Neighbor-Joining #' #' This function performs the neighbor-joining tree estimation of Saitou and @@ -157,7 +112,12 @@ UNJ <- function(x){ #' Weighted least squares is used with weights w, i.e., sum(w * e^2) is #' minimized. #' @param balanced use weights as in balanced fastME -#' @param tip.dates a vector of sampling times associated to the tips of tree. +#' @param tip.dates a named vector of sampling times associated to the tips of +#' the tree. +#' @param calibration a named vector of calibration times associated to nodes of +#' the tree. +#' @param eps minimum edge length (default s 1e-8). +## @param strict strict calibration. #' @return \code{nnls.tree} return a tree, i.e. an object of class #' \code{phylo}. \code{designTree} and \code{designSplits} a matrix, possibly #' sparse. @@ -181,26 +141,27 @@ UNJ <- function(x){ #' @rdname designTree #' @export designTree <- function(tree, method = "unrooted", sparse = FALSE, - tip.dates=NULL, ...) { - if (!is.na(pmatch(method, "all"))) - method <- "unrooted" - METHOD <- c("unrooted", "rooted", "tipdated") - method <- pmatch(method, METHOD) - if (is.na(method)) stop("invalid method") - if (method == -1) stop("ambiguous method") - if (!is.rooted(tree) & method == 2) stop("tree has to be rooted") - if (method == 1) { - X <- designUnrooted(tree, ...) - if (sparse) X <- Matrix(X) - } - if (method == 2) X <- designUltra(tree, sparse = sparse, ...) - if(method == 3) X <- designTipDated(tree, tip.dates=tip.dates, sparse=sparse, - ...) + tip.dates=NULL, calibration=NULL, ...) { # , strict=TRUE + method <- match.arg(method, + c("unrooted", "ultrametric", "rooted", "tipdated")) + if(method == "rooted") method <- "ultrametric" + if(has.singles(tree)) tree <- collapse.singles(tree) + #if (!is.na(pmatch(method, "all"))) + # method <- "unrooted" + #METHOD <- c("unrooted", "rooted", "tipdated") + #method <- pmatch(method, METHOD) + #if (is.na(method)) stop("invalid method") + #if (method == -1) stop("ambiguous method") + #if (!is.rooted(tree) & method == 2) stop("tree has to be rooted") + if(method == "unrooted") X <- designUnrooted(tree, sparse = sparse, ...) + if(method == "ultrametric") X <- designUltra(tree, sparse = sparse, ...) + if(method == "tipdated") X <- designTipDated(tree, tip.dates=tip.dates, + sparse=sparse, ...) X } -designUnrooted <- function(tree, order = NULL) { +designUnrooted <- function(tree, sparse=FALSE, order = NULL) { if (inherits(tree, "phylo")) { if (is.rooted(tree)) tree <- unroot(tree) tree <- reorder(tree, "postorder") @@ -225,13 +186,15 @@ designUnrooted <- function(tree, order = NULL) { } if (inherits(tree, "phylo")) colnames(res) <- paste(tree$edge[, 1], tree$edge[, 2], sep = "<->") + if(sparse) res <- Matrix(res, sparse=TRUE) res } -designUltra <- function(tree, sparse = TRUE) { +designUltra <- function(tree, sparse = TRUE, calibration=NULL) { if (is.null(attr(tree, "order")) || attr(tree, "order") != "postorder") tree <- reorder(tree, "postorder") +# stopifnot( !(!is.null(calibration) && is.null(tree$node.label))) leri <- allChildren(tree) bp <- bip(tree) n <- length(tree$tip.label) @@ -341,7 +304,11 @@ designUnrooted2 <- function(tree, sparse = TRUE) { } -designTipDated <- function(tree, tip.dates, sparse = TRUE){ +designTipDated <- function(tree, tip.dates, sparse=TRUE){ + #, strict=TRUE + #if(!is.numeric(tip.dates)) browser() + #if(!length(tip.dates) >= Ntip(tree)) browser() + stopifnot(is.numeric(tip.dates), length(tip.dates) >= Ntip(tree)) nTip <- Ntip(tree) tmp <- function(n){ x1 <- rep(seq_len(n), each=n) @@ -352,7 +319,7 @@ designTipDated <- function(tree, tip.dates, sparse = TRUE){ tip.dates <- tip.dates - max(tip.dates) x <- tmp(nTip) %*% tip.dates nodes <- integer(tree$Nnode) - X <- designUltra(tree) + X <- designUltra(tree, sparse=sparse) nodes <- attr(X, "nodes") X <- cbind(X, x) colnames(X) <- c(nodes, -1) @@ -361,11 +328,56 @@ designTipDated <- function(tree, tip.dates, sparse = TRUE){ } +designCalibrated <- function(tree, sparse=TRUE, calibration=NULL){ + #, tip.dates=NULL, strict=TRUE + #stopifnot(is.numeric(tip.dates), length(tip.dates) >= Ntip(tree)) + nTip <- Ntip(tree) + #if(!is.null(tree$node.label)) + cname <- tree$node.label +# nodes <- integer(tree$Nnode) + X <- designUltra(tree, sparse=sparse) + #if(!is.null(tree$node.label)) + colnames(X) <- cname + x <- X[, names(calibration), drop=FALSE] %*% calibration + X <- cbind(X[,-match(names(calibration), cname)], rate=x) +# nodes <- attr(X, "nodes") +# X <- cbind(X, x) +# colnames(X) <- c(nodes, -1) +# attr(X, "nodes") <- nodes + X +} + + +designConstrained <- function(tree, sparse=TRUE, tip.dates=NULL, + calibration=NULL){ + stopifnot(is.numeric(tip.dates), length(tip.dates) >= Ntip(tree)) + X <- designUltra(tree, sparse=sparse) + nTip <- Ntip(tree) + # designTipDated + if(!is.null(tip.dates)){ + tmp <- function(n){ + x1 <- rep(seq_len(n), each=n) + x2 <- rep(seq_len(n), n) + ind <- x1 < x2 + sparseMatrix(i = rep(seq_len(sum(ind)), 2), j = c(x1[ind], x2[ind])) + } + } + if(!is.null(calibration)){ + cname <- tree$node.label + colnames(X) <- cname + x <- X[, names(calibration), drop=FALSE] %*% calibration + X <- cbind(X[,-match(names(calibration), cname)], rate=x) + } + + +} + #' @rdname designTree #' @export nnls.tree <- function(dm, tree, method=c("unrooted", "ultrametric", "tipdated"), rooted=NULL, trace=1, weight=NULL, balanced=FALSE, tip.dates=NULL) { method <- match.arg(method, c("unrooted", "ultrametric", "tipdated")) + if(has.singles(tree)) tree <- collapse.singles(tree) if (is.rooted(tree) && method == "unrooted") tree <- unroot(tree) tree <- reorder(tree, "postorder") if (balanced) { @@ -421,7 +433,10 @@ nnls.tree <- function(dm, tree, method=c("unrooted", "ultrametric", "tipdated"), RSS <- sum((y - (X %*% betahattmp))^2) if (trace > 1) print(paste("RSS:", RSS)) attr(tree, "RSS") <- RSS - if(method=="tipdated") betahat <- betahat / rate + if(method=="tipdated"){ + betahat <- betahat / rate + attr(tree, "rate") <- rate + } tree$edge.length <- betahat return(tree) } @@ -494,7 +509,7 @@ nnls.phylo <- function(x, dm, method = "unrooted", trace = 0, ...) { #' @rdname designTree #' @export -nnls.splits <- function(x, dm, trace = 0) { +nnls.splits <- function(x, dm, trace = 0, eps = 1e-8) { labels <- attr(x, "labels") dm <- as.matrix(dm) k <- dim(dm)[1] @@ -517,23 +532,24 @@ nnls.splits <- function(x, dm, trace = 0) { dvec <- crossprod(X, y) betahat <- as.vector(solve(Dmat, dvec)) - if (!any(betahat < 0)) { - RSS <- sum((y - (X %*% betahat))^2) - if (trace > 1) print(paste("RSS:", RSS)) - attr(x, "RSS") <- RSS - attr(x, "weights") <- betahat - return(x) - } - n <- dim(X)[2] +# if (!any(betahat < 0)) { +# RSS <- sum((y - (X %*% betahat))^2) +# if (trace > 1) print(paste("RSS:", RSS)) +# attr(x, "RSS") <- RSS +# attr(x, "weights") <- betahat +# return(x) +# } int <- lengths(x) - - # quadratic programing - Amat <- matrix(1, 1, n) - Aind <- matrix(0L, 2L, n) - Aind[1, ] <- 1L - Aind[2, ] <- as.integer(1L:n) - betahat <- quadprog::solve.QP.compact(as.matrix(Dmat), as.vector(dvec), Amat, - Aind)$sol + if (any(betahat < 0)) { + n <- dim(X)[2] + # quadratic programing + Amat <- matrix(1, 1, n) + Aind <- matrix(0L, 2L, n) + Aind[1, ] <- 1L + Aind[2, ] <- as.integer(1L:n) + betahat <- quadprog::solve.QP.compact(as.matrix(Dmat), as.vector(dvec), + Amat, Aind)$sol + } RSS <- sum((y - (X %*% betahat))^2) ind <- (betahat > 1e-8) | int == 1 x <- x[ind] @@ -546,10 +562,10 @@ nnls.splits <- function(x, dm, trace = 0) { #' @rdname designTree #' @export -nnls.networx <- function(x, dm) { +nnls.networx <- function(x, dm, eps = 1e-8) { # spl <- attr(x, "splits") spl <- x$splits - spl2 <- nnls.splits(spl, dm) + spl2 <- nnls.splits(spl, dm, eps=eps) weight <- attr(spl, "weight") weight[] <- 0 weight[match(spl2, spl)] <- attr(spl2, "weight") diff --git a/R/draw_CI.R b/R/draw_CI.R index 2ba3163d..06b89042 100644 --- a/R/draw_CI.R +++ b/R/draw_CI.R @@ -1,19 +1,22 @@ edge_length_matrix <- function(tree, trees, rooted=TRUE){ - if(!inherits(trees, "multiPhylo")) stop("Trees must be of class multiPhylo!") + if(!inherits(trees, "multiPhylo")) stop("trees must be of class multiPhylo") + if(inherits(tree, "networx")) rooted <- FALSE trees <- .uncompressTipLabel(trees) |> .compressTipLabel(ref=tree$tip.label) if(!rooted){ trees <- unroot(trees) tree <- unroot(tree) } else{ - if(!is_rooted(tree) || any(!is_rooted(trees))) stop("All trees need to be rooted!") + if(any(!is.rooted(trees))) stop("All trees need to be rooted!") } fun <- function(x){ el <- numeric(max(x$edge)) el[x$edge[,2]] <- x$edge.length el } - bp <- bip(tree) + if(inherits(tree, "networx")) bp <- tree$splits + else bp <- bip(tree) + if(!rooted) bp <- SHORTwise(bp) m <- length(bp) res <- matrix(NA_real_, length(trees), m) @@ -35,14 +38,15 @@ edge_length_matrix <- function(tree, trees, rooted=TRUE){ ##' @title Assign and compute edge lengths from a sample of trees ##' @description This command can infer some average edge lengths and assign ##' them from a (bootstrap/MCMC) sample. -##' @param tree tree where edge lengths are assigned to. +##' @param tree a phylogenetic tree or splitnetwork where edge lengths are +##' assigned to. ##' @param trees an object of class multiPhylo, where the average for the edges ##' is computed from. ##' @param fun a function to compute the average (default is median). ##' @param rooted rooted logical, if FALSE edge lengths is a function of the ##' observed splits, if TRUE edge lengths are estimated from height for the ##' observed clades. -##' @return NULL +##' @return The tree with newly assigned edge length. ##' @author Klaus Schliep ##' @importFrom graphics legend rect ##' @examples @@ -59,12 +63,18 @@ edge_length_matrix <- function(tree, trees, rooted=TRUE){ ##' @keywords aplot ##' @export add_edge_length <- function(tree, trees, fun=\(x)median(na.omit(x)), - rooted=TRUE){ + rooted=all(is.rooted(trees))){ if(!rooted) tree <- unroot(tree) X <- edge_length_matrix(tree, trees, rooted) nh <- apply(X, 2, fun) - if(rooted) tree$edge.length <- nh[tree$edge[,1]] - nh[tree$edge[,2]] - else tree$edge.length <- nh[tree$edge[,2]] + if(inherits(tree, "networx")){ + tree$edge.length <- nh[tree$splitIndex] + attr(tree$splits, "weights") <- nh + } + else{ + if(rooted) tree$edge.length <- nh[tree$edge[,1]] - nh[tree$edge[,2]] + else tree$edge.length <- nh[tree$edge[,2]] + } tree } @@ -84,7 +94,7 @@ add_edge_length <- function(tree, trees, fun=\(x)median(na.omit(x)), ##' @param \dots arguments passed to other functions, \code{\link{legend}} or ##' \code{\link{bxp}}. ##' @details All trees should to be rooted, either ultrametric or tip dated. -##' @return NULL +##' @return Nothing. Function is called for adding to a plot. ##' @author Emmanuel Paradis, Santiago Claramunt, Joseph Brown, Klaus Schliep ##' @importFrom graphics legend rect bxp boxplot ##' @importFrom stats median @@ -100,7 +110,7 @@ add_edge_length <- function(tree, trees, fun=\(x)median(na.omit(x)), ##' plot(tree, direction="downwards") ##' add_boxplot(tree, trees, boxwex=.7) ##' @seealso \code{\link{plot.phylo}}, \code{\link{plotBS}}, -##' \code{\link{add_edge_length}} +##' \code{\link{add_edge_length}}, \code{\link{maxCladeCred}} ##' @keywords aplot ##' @rdname add_ci ##' @export @@ -110,8 +120,8 @@ add_ci <- function(tree, trees, col95 = "#FF00004D", col50 = "#0000FF4D", lastPP <- get("last_plot.phylo", envir = ape::.PlotPhyloEnv) direction <- lastPP$direction trees <- .uncompressTipLabel(trees) - if(!is_rooted(tree) || !all(is_rooted(trees))) stop("All trees need to be rooted!") - X <- edge_length_matrix(tree, trees, rooted=TRUE)[, -(seq_along(Ntip(tree)))] + if(!is.rooted(tree) || !all(is.rooted(trees))) stop("All trees need to be rooted!") + X <- edge_length_matrix(tree, trees, rooted=TRUE)[, -(seq_len(Ntip(tree)))] CI <- apply(X, 2, FUN=\(x)quantile(na.omit(x), probs=c(.025,.25,.75,.975))) horizontal <- FALSE if(direction=="rightwards" || direction=="leftwards"){ diff --git a/R/fitch64.R b/R/fitch64.R index ffa8ee74..95604abb 100644 --- a/R/fitch64.R +++ b/R/fitch64.R @@ -152,7 +152,7 @@ fitch_spr <- function (tree, f, trace=0L) } -indexNNI_fitch <- function(tree, offset=2L*Ntip(tree)) { +indexNNI_fitch <- function(tree, offset=2L*Ntip(tree), rooted=is.rooted(tree)) { offset <- as.integer(offset) parent <- tree$edge[, 1] child <- tree$edge[, 2] @@ -169,6 +169,15 @@ indexNNI_fitch <- function(tree, offset=2L*Ntip(tree)) { # e-----f d is closest to root, f is root from subtree a,b,c # / \ # b c c(a,b,c,d,e,f) + + # d d is f + offset, if offset > 0 + # / + # f + # / \ + # e \ + # / \ c + # a b + k <- 1 for (i in ind) { f <- pvector[i] @@ -179,6 +188,12 @@ indexNNI_fitch <- function(tree, offset=2L*Ntip(tree)) { if (pvector[f]){ cd <- c(cd, f + offset) } + if(offset < 0L){ + tmp <- pvector[f] + if(tmp==0L) tmp <- f + # think about this more + cd[2] <- tmp + } # else if(rooted) cd <- c(cd, NA_integer_) # else if(!rooted) ef <- c(i, cd[2]) # else cd[2] <- f @@ -246,7 +261,7 @@ optim.fitch <- function(tree, data, trace = 1, rearrangements = "NNI", ...) { } if (is.null(attr(tree, "order")) || attr(tree, "order") != "postorder") tree <- reorder(tree, "postorder") - if (class(data)[1] != "phyDat") stop("data must be of class phyDat") + if (!inherits(data, "phyDat")) stop("data must be of class phyDat") rt <- FALSE diff --git a/R/gap_as_state.R b/R/gap_as_state.R new file mode 100644 index 00000000..c6917a11 --- /dev/null +++ b/R/gap_as_state.R @@ -0,0 +1,101 @@ +#' Treat gaps as a state +#' +#' The function \code{gap_as_state} changes the contrast of an phyDat object to +#' treat as its own state. Internally \code{phyDat} are stored similar to a +#' \code{factor} objects and only the contrast matrix and some attributes +#' change. +#' +#' @param obj An object of class phyDat. +#' @param gap a character which codes for the gaps (default is "-"). +#' @param ambiguous a character which codes for the ambiguous state +#' @return The functions return an object of class \code{phyDat}. +#' @author Klaus Schliep \email{klaus.schliep@@gmail.com} +#' @seealso \code{\link{phyDat}}, \code{\link{ltg2amb}}, \code{\link{latag2n}}, +#' \code{\link{ancestral.pml}}, \code{\link{gap_as_state}} +#' @keywords cluster +#' @examples +#' data(Laurasiatherian) +#' tmp <- gap_as_state(Laurasiatherian) +#' contr <- attr(tmp, "contrast") +#' rownames(contr) <- attr(tmp, "allLevels") +#' contr +#' @rdname gap_as_state +#' @export +gap_as_state <- function(obj, gap="-", ambiguous="?"){ + if(has_gap_state(obj)) return(obj) +# if(!is.null(attr(obj, "gap_is_state")) & isTRUE(attr(obj, "gap_is_state"))) +# return(obj) + contrast <- cbind(attr(obj, "contrast"), gap = 0) + levels <- c(attr(obj, "levels"), gap) + colnames(contrast) <- levels + rownames(contrast) <- attr(obj, "allLevels") + contrast[gap, ] <- 0 + contrast[gap, gap] <- 1 + # todo check for ambiguous + contrast[ambiguous, "-"] <- 1 + rownames(contrast) <- NULL + attr(obj, "levels") <- levels + attr(obj, "nc") <- attr(obj, "nc") + 1L + attr(obj, "contrast") <- contrast +# attr(obj, "gap_is_state") <- TRUE + obj +} + + +#' @rdname gap_as_state +#' @export +gap_as_ambiguous <- function(obj, gap="-"){ + if(!has_gap_state(obj)) return(obj) +# if(is.null(attr(obj, "gap_is_state")) | !isTRUE(attr(obj, "gap_is_state"))) +# return(obj) + contrast <- attr(obj, "contrast") + levels <- attr(obj, "levels") +# colnames(contrast) <- levels + rownames(contrast) <- attr(obj, "allLevels") + contrast[gap, ] <- 1 + rownames(contrast) <- NULL + ind <- match(gap, levels) + contrast <- contrast[,-ind] + attr(obj, "levels") <- levels[-ind] + attr(obj, "contrast") <- contrast + attr(obj, "nc") <- attr(obj, "nc") - 1L +# attr(obj, "gap_is_state") <- FALSE + obj +} + + +#' @rdname gap_as_state +#' @export +has_gap_state <- function(obj){ + type <- attr(obj, "type") + if(type=="DNA" && attr(obj, "nc")==5) return(TRUE) + if(type=="AA" && attr(obj, "nc")==21) return(TRUE) + FALSE +} + + +add_gap_Q_AA <- function(Q, rate_gap=0.1){ + res <- matrix(0, 20, 20) + res[lower.tri(res)] <- Q + res <- cbind(rbind(res,rate_gap), 0) + res <- res[lower.tri(res)] + res +} + + +add_gap_bf_AA <- function(bf, gap=.01){ + bf <- c(bf, gap) + bf <- bf / sum(bf) + bf +} + + +remove_similar <- function(x, k=3, index=FALSE){ + dm <- dist.hamming(x, FALSE) + dm <- as.matrix(dm) + ind_dist <- which(dm < (k+1), arr.ind = TRUE) + ind_dist <- ind_dist[ind_dist[,1] < ind_dist[,2], ] + dist_i <- unique(ind_dist[, 2]) + if(index) return(dist_i) + x[-dist_i, ] +} diff --git a/R/hadamard.R b/R/hadamard.R index 1be7830c..68efc9b1 100644 --- a/R/hadamard.R +++ b/R/hadamard.R @@ -209,7 +209,7 @@ distanceHadamard <- function(dm, eps = 0.001) { #' @rdname hadamard #' @export h4st <- function(obj, levels = c("a", "c", "g", "t")) { - if (!inherits(obj, "phyDat")) stop("obj needs to be of class phyDat!") + if (!inherits(obj, "phyDat")) stop("obj must be of class phyDat") if (attr(obj, "nc") != 4L) stop("Error") obj <- removeAmbiguousSites(obj) obj <- as.data.frame(t(as.character(obj))) diff --git a/R/image_phyDat.R b/R/image_phyDat.R index 0f3961bf..8a686477 100644 --- a/R/image_phyDat.R +++ b/R/image_phyDat.R @@ -3,13 +3,20 @@ #' This function plots an image of an alignment of sequences. #' #' A wrapper for using \code{\link{image.DNAbin}} and \code{\link{image.AAbin}}. +#' Codons triplets are handled as nucleotide sequences. #' @param x an object containing sequences, an object of class \code{phyDat}. #' @param ... further arguments passed to or from other methods. +#' @returns Nothing. The function is called for plotting. #' @seealso \code{\link{image.DNAbin}}, \code{\link{image.AAbin}} +#' @examples +#' data("chloroplast") +#' image(chloroplast[, 1:50], scheme="Clustal", show.aa = TRUE) #' @method image phyDat #' @export image.phyDat <- function(x, ...){ + x <- gap_as_ambiguous(x) + if(attr(x, "type") == "CODON") x <- codon2dna(x) if(attr(x, "type") == "AA") image(as.AAbin(x), ...) if(attr(x, "type") == "DNA") image(as.DNAbin(x), ...) - else return(NULL) + if(attr(x, "type") == "USER") return(NULL) } diff --git a/R/joint_ASR.R b/R/joint_ASR.R new file mode 100644 index 00000000..e279b46d --- /dev/null +++ b/R/joint_ASR.R @@ -0,0 +1,72 @@ +# Joint reconstruction +joint_pml <- function(x){ + stopifnot(inherits(x, "pml")) + if(x$k > 1 || x$inv>0) stop("One one rate allowed so far!") + data <- x$data + eig <- x$eig + contrast <- attr(data, "contrast") + tree <- x$tree + tree <- reorder(tree, "postorder") + edge <- tree$edge + ntip <- Ntip(tree) + desc <- Descendants(tree, type="children") + el <- numeric(max(tree$edge)) + el[tree$edge[,2]] <- tree$edge.length + P <- getP(el * x$rate, eig = eig) + nr <- attr(data, "nr") + nc <- attr(data, "nc") + levels <- attr(data, "levels") + allLevels <- attr(data, "allLevels") + l <- length(tree$edge.length) + L <- C <- array(NA, c(nr, nc, max(tree$edge))) + for(i in seq_len(Ntip(tree))){ + L[,,i]<- log(contrast[data[[i]],,drop=FALSE]%*%P[[i]]) + } + nn <- unique(edge[,1]) + pa <- 0 + root <- edge[l, 1] + lnr <- seq_len(nr) + for(i in seq_len(length(nn))){ + pa <- nn[i] + ch <- desc[[pa]] + P_i <- P[[pa]] + tmp1 <- tmp2 <- matrix(0, nr, nc) + for(j in seq_along(ch)){ + tmp1 <- tmp1 + L[,,ch[j]] + } + if(pa==root) break() + for(j in 1:nc){ + pp <- tmp1 + rep(log(P_i[j,]), each=nr) + pos <- max.col(pp) + L[,j,pa]<- pp[ cbind(lnr, pos)] + C[,j,pa] <- pos + } + } + pp <- tmp1 + rep(log(x$bf), each=nr) + pos <- max.col(pp) + L[,,pa] <- pp + C[,,pa] <- pos + tree <- reorder(tree) + if(is.null(tree$node.label)) tree <- makeNodeLabel(tree) + res <- vector("list", length(tree$node.label)) + names(res) <- tree$node.label + res[[1]] <- pos + att <- attributes(data) + att$names <- tree$node.label + labels <- c(tree$tip.label, tree$node.label) + edge <- tree$edge + nrw <- seq_len(nr) + for(i in seq_along(edge[,1])){ + ch_i <- edge[i,2] + pa_i <- edge[i,1] + if(ch_i > ntip){ + pos <-res[[labels[pa_i]]] + res[[labels[ch_i]]] <- C[cbind(nrw,pos,ch_i)] + } + } + ind <- match(levels, allLevels) + for(i in length(res)) res[[i]] <- ind[res[[i]]] + attributes(res) <- att + res +} + diff --git a/R/linkfun.R b/R/linkfun.R new file mode 100644 index 00000000..2a1c9cff --- /dev/null +++ b/R/linkfun.R @@ -0,0 +1,88 @@ +#' Family objects for evolutionary models +#' +#' Family objects provide a convenient way to specify the details of the models +#' used by functions such as \code{glm}. \code{binomial_mk} extends the +#' \code{binomial} family with the Mk model. The 2 state model is also known +#' Neyman and 4 state model as the Jukes Cantor model. +#' See the documentation for \code{\link{family}} for more details and +#' \code{\link{glm}} for the details on how such model fitting takes place. +#' +#' The link function for the Jukes Cantor 4 state model is +#' \deqn{g(\mu) = -0.75 \cdot log(1-(4/3)\cdot\mu)}{-0.75 * log(1 - (4/3) * mu)} +#' +#' @seealso \code{\link{family}}, \code{\link{binomial}}, \code{\link{glm}} +#' @param k number of states. +#' @importFrom stats binomial dbinom +#' @examples +#' plot(function(x) binomial_mk()$linkinv(x), 0, 1.5, , ylim=c(0, 1), asp=1, +#' main = "inverse link function (JC69)", ylab="f(x)") +#' abline(0, 1) +#' abline(h=.75) +#' plot(function(x) binomial_mk()$linkfun(x), 0, .75, +#' main = "link function (JC69)", ylab="f(x)") +#' +#' data(yeast) +#' dm <- dist.hamming(yeast, FALSE) +#' y <- cbind(dm, 127026 - dm) +#' tree <- nj(dm) +#' X <- designTree(tree) +#' glm(y ~ X -1, binomial_mk()) +#' @noRd +binomial_mk <- function (k=4) +{ + stats <- make_link_mk(k) + variance <- function(mu) mu * (1 - mu) + + tmp <- binomial() + aic <- function(y, n, mu, wt, dev) { + m <- if (any(n > 1)) + n + else wt + -2 * sum(ifelse(m > 0, (wt/m), 0) * dbinom(round(m * + y), round(m), mu, log = TRUE)) + } + # initialize fuer verschiedene Modelle veraendern !!! + initialize <- expression({ + if (NCOL(y) == 1) { + if (is.factor(y)) y <- y != levels(y)[1] + n <- rep.int(1, nobs) + if (any(y < 0 | y > 1)) stop("y values must be 0 <= y <= 1") + mustart <- (weights * y + 0.5)/(2*(weights + 1)) + m <- weights * y + if (any(abs(m - round(m)) > 0.001)) warning("non-integer #successes in a binomial glm!") + } else if (NCOL(y) == 2) { + if (any(abs(y - round(y)) > 0.001)) warning("non-integer counts in a binomial glm!") + n <- y[, 1] + y[, 2] + y <- ifelse(n == 0, 0, y[, 1]/n) + weights <- weights * n + mustart <- (n * y + 0.5)/(2*(n + 1)) + } else stop("for the binomial family, y must be a vector of 0 and 1's\n", + "or a 2 column matrix where col 1 is no. successes and col 2 is no. failures") + }) + structure(list(family = "binomial", link = "Mk", linkfun = stats$linkfun, + linkinv = stats$linkinv, variance = variance, dev.resids = tmp$dev.resids, + aic = aic, mu.eta = stats$mu.eta, initialize = tmp$initialize, + validmu = stats$validmu, valideta = stats$valideta, simulate = tmp$simfun, + dispersion = 1), class = "family") +} + + +make_link_mk <- function (n=4L){ + f <- substitute(ifelse(mu>0, -(k-1)/k * log(1-(k/(k-1))*mu),0), list(k=n)) + linkfun <- as.function(list(f)) + formals(linkfun) <- alist(mu=) + f <- substitute(ifelse(eta>0, (k-1)/k*(1-exp(-(k/(k-1))*eta)),0), list(k=n)) + linkinv <- as.function(list(f)) + formals(linkinv) <- alist(eta=) + f <- substitute(exp(-(k/(k-1))*eta), list(k=n)) + mu.eta <- as.function(list(f)) + formals(mu.eta) <- alist(eta=) + # validmu <- function(mu) all(mu > 0) && all(mu < .5) + f <- substitute(all(mu > 0) && all(mu < (k-1)/k), list(k=n)) + validmu <- as.function(list(f)) + formals(validmu) <- alist(mu=) + valideta <- function(eta) all(eta > 0) + list(linkfun = linkfun, linkinv = linkinv, mu.eta = mu.eta, + valideta = valideta) +} + diff --git a/R/ltg2amb.R b/R/ltg2amb.R new file mode 100644 index 00000000..09c5aa1a --- /dev/null +++ b/R/ltg2amb.R @@ -0,0 +1,45 @@ +#' Replace leading and trailing alignment gaps with an ambiguous state +#' +#' Substitutes leading and trailing alignment gaps in aligned sequences into N +#' (i.e., A, C, G, or T) or ?. The gaps in the middle of the sequences are left +#' unchanged. +#' @param x an object of class \code{phyDat}. +#' @param amb character of the ambiguous state t replace the gaps. +#' @param gap gap parameter to replace. +#' @returns returns an object of class \code{phyDat}. +#' @seealso \code{\link{latag2n}}, \code{\link{ancestral.pml}}, +#' \code{\link{gap_as_state}} +#' @examples +#' x <- phyDat(matrix(c("-", "A", "G", "-", "T", "C"), 2, 3)) +#' y <- ltg2amb(x) +#' image(x) +#' image(y) +#' @keywords cluster +#' @export +ltg2amb <- function(x, amb=ifelse(attr(x,"type")=="DNA", "N", "?"), gap="-"){ + X <- as.character(x) + d <- dim(X) + for(i in seq_len(d[1])){ + # from start + j <- 1 + while(X[i,j]==gap){ + X[i,j] <- amb + j <- j+1 + if(j>d[2])break() + } + j <- d[2] + while(X[i,j]==gap){ + X[i,j] <- amb + j = j-1 + if(j<1)break() + } + } + type <- attr(x, "type") + if(type %in% c("DNA", "AA", "CODON")){ + y <- phyDat(X, type=type) + } else y <- phyDat(X, type="USER", levels = attr(x, "levels")) + y +} +# same as: +# dna2 <- as.DNAbin(dna) |> latag2n() |> as.phyDat() +# but works for AA diff --git a/R/maxCladeCred.R b/R/maxCladeCred.R index d792b3b9..42a3a0c4 100644 --- a/R/maxCladeCred.R +++ b/R/maxCladeCred.R @@ -2,15 +2,18 @@ #' #' \code{maxCladeCred} computes the maximum clade credibility tree from a #' sample of trees. -#' #' So far just the best tree is returned. No annotations or transformations of -#' edge length are performed. +#' edge length are performed and the edge length are taken from the tree. #' #' If a list of partition is provided then the clade credibility is computed #' for the trees in x. #' #' \code{allCompat} returns a 50\% majority rule consensus tree with added -#' compatible splits similar to the option allcompat in MrBayes. +#' compatible splits similar to the option allcompat in MrBayes. This tree has +#' no edge length. +#' +#' \code{\link{add_edge_length}} can be used to add edge lengths computed from +#' the sample of trees. #' #' @param x \code{x} is an object of class \code{multiPhylo} or \code{phylo} #' @param tree logical indicating whether return the tree with the clade @@ -23,7 +26,8 @@ #' @author Klaus Schliep \email{klaus.schliep@@gmail.com} #' @seealso \code{\link{consensus}}, \code{\link{consensusNet}}, #' \code{\link{prop.part}}, \code{\link{bootstrap.pml}}, \code{\link{plotBS}}, -#' \code{\link{transferBootstrap}} +#' \code{\link{transferBootstrap}}, \code{\link{add_edge_length}}, +#' \code{\link{add_boxplot}} #' @keywords cluster #' @importFrom fastmatch fmatch #' @examples @@ -45,6 +49,14 @@ #' plot(majority_consensus, main="Majority consensus tree") #' plot(all_compat, main="Majority consensus tree with compatible splits") #' plot(max_clade_cred, main="Maximum clade credibility tree") +#' +#' par(mfrow = c(2,1)) +#' plot(max_clade_cred, main="Edge length from tree") +#' add_boxplot(max_clade_cred, bs) +#' max_clade_cred_2 <- add_edge_length(max_clade_cred, bs) +#' plot(max_clade_cred_2, main="Edge length computed from sample") +#' add_boxplot(max_clade_cred_2, bs) +#' #' par(old.par) #' #' # compute clade credibility for trees given a prop.part object @@ -109,7 +121,7 @@ mcc <- maxCladeCred #' @export allCompat <- function(x, rooted=FALSE) { x <- .compressTipLabel(x) - if(!rooted) x <- unroot(x) + if(!rooted) x <- unroot(x, collapse.singles=TRUE) l <- length(x) nt <- Ntip(x[[1]]) pp <- prop.part(x) diff --git a/R/modelTest.R b/R/modelTest.R index f94251cb..603eb252 100644 --- a/R/modelTest.R +++ b/R/modelTest.R @@ -209,7 +209,6 @@ modelTest <- function(object, tree = NULL, model = NULL, G = TRUE, I = TRUE, m <- m + 1 } if (FREQ & G & I) { -# if (trace > 0) print(paste0(model, "+G+I+F")) fitGIF <- update(fitIF, k = k) fitGIF <- optim.pml(fitGIF, model = model, optBf = TRUE, optInv = TRUE, optGamma = TRUE, control = control) diff --git a/R/networx.R b/R/networx.R index befcab4f..f2784354 100644 --- a/R/networx.R +++ b/R/networx.R @@ -43,9 +43,9 @@ allCircularSplits <- function(k, labels = NULL) { } -degree <- function(x){ - tabulate(x$edge) -} +#degree <- function(x){ +# tabulate(x$edge) +#} splits2design <- function(obj, weight = NULL, x=TRUE) { @@ -85,7 +85,7 @@ addEdge <- function(network, desc, spl) { fromTo <- intersect(attr(desc, "cycle"), split[[1]]) fromTo <- parent[match(fromTo, child)] - g <- graph(t(edge), directed = FALSE) + g <- make_graph(t(edge), directed = FALSE) ind <- NULL for (i in 2:length(fromTo)) { d <- all_shortest_paths(g, fromTo[i - 1], fromTo[i])$res @@ -156,7 +156,7 @@ circNetwork <- function(x, ord = NULL) { X <- as.matrix(x)[, ord] Y <- X rsY <- rowSums(Y) - X <- X[ind, ] + X <- X[ind, , drop=FALSE] for (k in seq_along(ind)) { Vstart <- ord[1] @@ -177,7 +177,7 @@ circNetwork <- function(x, ord = NULL) { fromTo <- ordStart:ordStop if (ordStart > ordStop) fromTo <- c(ordStart:nTips, 1:ordStop) fromTo <- ord[fromTo] - g <- graph(t(res$edge), directed = FALSE) + g <- make_graph(t(res$edge), directed = FALSE) isChild <- (rsY == (Y %*% X[k, ]))[index] sp2 <- NULL @@ -266,6 +266,7 @@ circNetwork <- function(x, ord = NULL) { #' splits (may excludes splits). #' @param coord add coordinates of the nodes, allows to reproduce the plot. #' @param \dots Further arguments passed to or from other methods. +#' @returns an object of class \code{networx}. #' @note The internal representation is likely to change. #' @author Klaus Schliep \email{klaus.schliep@@gmail.com} #' @seealso \code{\link{consensusNet}}, \code{\link{neighborNet}}, @@ -277,7 +278,7 @@ circNetwork <- function(x, ord = NULL) { #' Intertwining phylogenetic trees and networks. \emph{Methods Ecol Evol}. #' \bold{8}, 1212--1220. doi:10.1111/2041-210X.12760 #' @keywords plot -#' @importFrom igraph graph +#' @importFrom igraph make_graph #' @examples #' #' set.seed(1) @@ -414,7 +415,7 @@ as.networx.phylo <- function(x, ...) { # as.igraph.networx <- function(x, directed=FALSE){ -# graph(t(x$edge), directed=directed) +# make_graph(t(x$edge), directed=directed) # } @@ -424,7 +425,7 @@ reorder.networx <- function(x, order = "cladewise", index.only = FALSE, ...) { if (!is.null(attr(x, "order"))) if (attr(x, "order") == order) return(x) - g <- graph(t(x$edge)) + g <- make_graph(t(x$edge)) if (order == "cladewise") neword <- topo_sort(g, "out") else neword <- topo_sort(g, "in") neworder <- order(match(x$edge[, 1], neword)) @@ -438,534 +439,3 @@ reorder.networx <- function(x, order = "cladewise", index.only = FALSE, ...) { attr(x, "order") <- order x } - - -# some trigonemetric functions -rad2deg <- function(rad) (rad * 180) / (pi) -deg2rad <- function(deg) (deg * pi) / (180) - -# circular mean -# https://en.wikipedia.org/wiki/Mean_of_circular_quantities -circ.mean <- function(deg) { - rad.m <- (deg * pi) / (180) - mean.cos <- mean(cos(rad.m)) - mean.sin <- mean(sin(rad.m)) - - theta <- rad2deg(atan(mean.sin / mean.cos)) - if (mean.cos < 0) theta <- theta + 180 - if ((mean.sin < 0) & (mean.cos > 0)) theta <- theta + 360 - theta -} - - -spl2angle <- function(x) { - l <- length(attr(x, "labels")) - ord <- 1:l - if (!is.null(attr(x, "cycle"))) ord <- attr(x, "cycle") - x <- changeOrder(x, attr(x, "labels")[ord]) - y <- lapply(x, function(x, l) (x - 1) / l * 360, l = l) - angle <- vapply(y, circ.mean, 0) |> deg2rad() - # angle <- ((vapply(x, sum, 0) / lengths(x) - 1) / l ) * 2*pi - # kreis2kart(attr(x, "weight"), angle) - angle -} - - -coords.equal.angle <- function(obj) { - if (is.null(attr(obj, "order")) || (attr(obj, "order") == "postorder")) - obj <- reorder.networx(obj) - spl <- obj$splits - spl <- SHORTwise(spl) #, length(obj$tip.label)) - l <- length(obj$edge.length) -# ind1 <- which(!duplicated(obj$splitIndex)) - n <- max(obj$edge) - angle <- spl2angle(spl) - weight <- attr(spl, "weight") - k <- matrix(0, max(obj$splitIndex), 2) - - res <- matrix(0, max(obj$edge), 2) - for (i in 1:l) { # unique(obj$splitIndex) - j <- obj$edge[i, 1] - m <- obj$edge[i, 2] - p <- obj$splitIndex[i] - res[m, ] <- res[j, ] + kreis2kart(weight[p], angle[p]) - } - res -} - - -#' @rdname phangorn-internal -#' @export -coords <- function(obj, dim = "3D") { - # if(is.null(attr(obj,"order")) || (attr(obj, "order")=="postorder") ) - # obj = reorder.networx(obj) - - if (dim == "equal_angle") return(coords.equal.angle(obj)) - - l <- length(obj$edge.length) - ind1 <- which(!duplicated(obj$splitIndex)) - - n <- max(obj$edge) - adj <- spMatrix(n, n, i = obj$edge[, 2], j = obj$edge[, 1], - x = rep(1, length(obj$edge.length))) - g <- graph_from_adjacency_matrix(adj, "undirected") - ########## - # add this - # g2 <- graph(t(obj$edge), directed=FALSE) - # g2 <- set.edge.attribute(g, "weight", value=rep(1, nrow(obj$edge)) - if (dim == "3D") { - coord <- layout_nicely(g, dim = 3) - k <- matrix(0, max(obj$splitIndex), 3) - for (i in ind1) { - tmp <- coord[obj$edge[i, 2], ] - coord[obj$edge[i, 1], ] - k[obj$splitIndex[i], ] <- kart2kugel(tmp[1], tmp[2], tmp[3]) - } - k[obj$splitIndex[ind1], 1] <- obj$edge.length[ind1] - - res <- matrix(0, vcount(g), 3) - for (i in 1:l) { - j <- obj$edge[i, 1] - m <- obj$edge[i, 2] - p <- obj$splitIndex[i] - res[m, ] <- res[j, ] + kugel2kart(k[p, 1], k[p, 2], k[p, 3]) - } - } - else { - coord <- layout_nicely(g, dim = 2) - k <- matrix(0, max(obj$splitIndex), 2) - for (i in ind1) { - tmp <- coord[obj$edge[i, 2], ] - coord[obj$edge[i, 1], ] - k[obj$splitIndex[i], ] <- kart2kreis(tmp[1], tmp[2]) - } - k[obj$splitIndex[ind1], 1] <- obj$edge.length[ind1] - res <- matrix(0, vcount(g), 2) - for (i in 1:l) { - j <- obj$edge[i, 1] - m <- obj$edge[i, 2] - p <- obj$splitIndex[i] - res[m, ] <- res[j, ] + kreis2kart(k[p, 1], k[p, 2]) - } - } - res -} - - -kart2kugel <- function(x, y, z) { - r <- sqrt(x * x + y * y + z * z) - alpha <- atan(sqrt(x * x + y * y) / z) - if (z < 0) alpha <- alpha + pi - beta <- atan(y / x) - if (x < 0) beta <- beta + pi - c(r, alpha, beta) -} - - -kart2kreis <- function(x, y) { - r <- sqrt(x * x + y * y) - alpha <- atan(y / x) - if (x < 0) alpha <- alpha + pi - c(r, alpha) -} - - -kreis2kart <- function(r, alpha) { - c(r * cos(alpha), r * sin(alpha)) - # if(length(r)>1) return(matrix(c(r*cos(alpha), r*sin(alpha)), ncol=2)) - # else return(c(r*cos(alpha), r*sin(alpha))) -} - - -kugel2kart <- function(r, alpha, beta) { - x <- r * sin(alpha) * cos(beta) - y <- r * sin(alpha) * sin(beta) - z <- r * cos(alpha) - c(x, y, z) -} - - -edgeLabels <- function(xx, yy, zz = NULL, edge) { - XX <- (xx[edge[, 1]] + xx[edge[, 2]]) / 2 - YY <- (yy[edge[, 1]] + yy[edge[, 2]]) / 2 - if (!is.null(zz)) { - ZZ <- (zz[edge[, 1]] + zz[edge[, 2]]) / 2 - return(cbind(XX, YY, ZZ)) - } - cbind(XX, YY) -} - - -#' plot phylogenetic networks -#' -#' So far not all parameters behave the same on the the \code{rgl} \code{"3D"} -#' and basic graphic \code{"2D"} device. -#' -#' Often it is easier and safer to supply vectors of graphical parameters for -#' splits (e.g. splits.color) than for edges. These overwrite values edge.color. -#' -#' @param x an object of class \code{"networx"} -#' @param type "3D" to plot using rgl or "equal angle" and "2D" in the normal -#' device. -#' @param use.edge.length a logical indicating whether to use the edge weights -#' of the network to draw the branches (the default) or not. -#' @param show.tip.label a logical indicating whether to show the tip labels on -#' the graph (defaults to \code{TRUE}, i.e. the labels are shown). -#' @param show.edge.label a logical indicating whether to show the tip labels -#' on the graph. -#' @param edge.label an additional vector of edge labels (normally not needed). -#' @param show.node.label a logical indicating whether to show the node labels -#' (see example). -#' @param node.label an additional vector of node labels (normally not needed). -#' @param show.nodes a logical indicating whether to show the nodes (see -#' example). -#' @param tip.color the colors used for the tip labels. -#' @param edge.color the colors used to draw edges. -#' @param edge.width the width used to draw edges. -#' @param edge.lty a vector of line types. -#' @param split.color the colors used to draw edges. -#' @param split.width the width used to draw edges. -#' @param split.lty a vector of line types. -#' @param font an integer specifying the type of font for the labels: 1 (plain -#' text), 2 (bold), 3 (italic, the default), or 4 (bold italic). -#' @param cex a numeric value giving the factor scaling of the labels. -#' @param cex.node.label a numeric value giving the factor scaling of the node -#' labels. -#' @param cex.edge.label a numeric value giving the factor scaling of the edge -#' labels. -#' @param col.node.label the colors used for the node labels. -#' @param col.edge.label the colors used for the edge labels. -#' @param font.node.label the font used for the node labels. -#' @param font.edge.label the font used for the edge labels. -#' @param underscore a logical specifying whether the underscores in tip labels -#' should be written as spaces (the default) or left as are (if TRUE). -#' @param \dots Further arguments passed to or from other methods. -#' @rdname plot.networx -#' @note The internal representation is likely to change. -#' @author Klaus Schliep \email{klaus.schliep@@gmail.com} -#' @seealso \code{\link{consensusNet}}, \code{\link{neighborNet}}, -#' \code{\link{splitsNetwork}}, \code{\link{hadamard}}, -#' \code{\link{distanceHadamard}}, \code{\link{as.networx}}, -#' \code{\link[ape]{evonet}}, \code{\link[ape]{as.phylo}}, -#' \code{\link{densiTree}}, \code{\link[ape]{nodelabels}} -#' @references Dress, A.W.M. and Huson, D.H. (2004) Constructing Splits Graphs -#' \emph{IEEE/ACM Transactions on Computational Biology and Bioinformatics -#' (TCBB)}, \bold{1(3)}, 109--115 -#' -#' Schliep, K., Potts, A. J., Morrison, D. A. and Grimm, G. W. (2017), -#' Intertwining phylogenetic trees and networks. \emph{Methods Ecol Evol}. -#' \bold{8}, 1212--1220. doi:10.1111/2041-210X.12760 -#' @keywords plot -#' @importFrom igraph graph -#' @examples -#' -#' set.seed(1) -#' tree1 <- rtree(20, rooted=FALSE) -#' sp <- as.splits(rNNI(tree1, n=10)) -#' net <- as.networx(sp) -#' plot(net) -#' \dontrun{ -#' # also see example in consensusNet -#' example(consensusNet) -#' } -#' @importFrom igraph graph_from_adjacency_matrix vcount topo_sort layout_nicely -#' @method plot networx -#' @export -plot.networx <- function(x, type = "equal angle", use.edge.length = TRUE, - show.tip.label = TRUE, show.edge.label = FALSE, - edge.label = NULL, show.node.label = FALSE, - node.label = NULL, show.nodes = FALSE, - tip.color = "black", edge.color = "black", - edge.width = 3, edge.lty = 1, split.color = NULL, - split.width = NULL, split.lty = NULL, font = 3, - cex = par("cex"), cex.node.label = cex, - cex.edge.label = cex, col.node.label = tip.color, - col.edge.label = tip.color, font.node.label = font, - font.edge.label = font, underscore = FALSE, ...) { - type <- match.arg(type, c("equal angle", "3D", "2D")) - if (use.edge.length == FALSE){ - x$edge.length[] <- 1 - attr(x$splits, "weight") <- rep(1, length(x$splits)) - } - nTips <- length(x$tip.label) - conf <- attr(x$splits, "confidences") - index <- x$splitIndex - if (is.null(edge.label) & !is.null(conf)) { - conf <- conf[index] - if (!is.null(x$translate)) conf[match(x$translate$node, x$edge[, 2])] <- "" - else conf[x$edge[, 2] <= nTips] <- "" - edge.label <- conf - } - if (is.null(node.label)) node.label <- as.character(1:max(x$edge)) - if (show.tip.label) node.label[1:nTips] <- "" - if (show.tip.label){ - if (is.expression(x$tip.label)) underscore <- TRUE - if (!underscore) x$tip.label <- gsub("_", " ", x$tip.label) - } - - lspl <- max(x$splitIndex) - if (!is.null(split.color)) { - if (length(split.color) != lspl) - stop("split.color must be same length as splits") - else edge.color <- split.color[x$splitIndex] - } - if (!is.null(split.width)) { - if (length(split.width) != lspl) - stop("split.color must be same length as splits") - else edge.width <- split.width[x$splitIndex] - } - if (!is.null(split.lty)) { - if (length(split.lty) != lspl) - stop("split.color must be same length as splits") - else edge.lty <- split.lty[x$splitIndex] - } - - chk <- FALSE - - if (type == "3D") chk <- requireNamespace("rgl", quietly = TRUE) - if (!chk && type == "3D") { - warning("type='3D' requires the package 'rgl', plotting in '2D' instead!\n") - type <- "2D" - } - # use precomputed vertices when available - coord <- NULL - if (!is.null(x$.plot)) coord <- x$.plot$vertices - - if (type == "3D") { - if (is.null(coord) || ncol(coord) != 3) - coord <- coords(x, dim = "3D") - plotRGL(coord, x, show.tip.label = show.tip.label, - show.edge.label = show.edge.label, edge.label = edge.label, - show.node.label = show.node.label, node.label = node.label, - show.nodes = show.nodes, tip.color = tip.color, edge.color = edge.color, - edge.width = edge.width, font = font, cex = cex, - cex.node.label = cex.node.label, cex.edge.label = cex.edge.label, - col.node.label = col.node.label, col.edge.label = col.edge.label, - font.node.label = font.node.label, font.edge.label = font.edge.label) - } - else { - if (is.null(coord) || ncol(coord) != 2) { - if (type == "equal angle") coord <- coords.equal.angle(x) - else coord <- coords(x, dim = "2D") - } - plot2D(coord, x, show.tip.label = show.tip.label, - show.edge.label = show.edge.label, edge.label = edge.label, - show.node.label = show.node.label, node.label = node.label, - show.nodes = show.nodes, tip.color = tip.color, edge.color = edge.color, - edge.width = edge.width, edge.lty = edge.lty, font = font, cex = cex, - cex.node.label = cex.node.label, cex.edge.label = cex.edge.label, - col.node.label = col.node.label, col.edge.label = col.edge.label, - font.node.label = font.node.label, font.edge.label = font.edge.label, - add = FALSE) - } - x$.plot <- list(vertices = coord, edge.color = edge.color, - edge.width = edge.width, edge.lty = edge.lty) - L <- list(Ntip = nTips, type = "networx") - assign("last_plot.phylo", c(L, list(edge = x$edge, xx = coord[, 1], - yy = coord[, 2])), envir = .PlotPhyloEnv) - invisible(x) -} - - -plotRGL <- function(coords, net, show.tip.label = TRUE, show.edge.label = FALSE, - edge.label = NULL, show.node.label = FALSE, - node.label = NULL, show.nodes = FALSE, tip.color = "blue", - edge.color = "grey", edge.width = 3, font = 3, - cex = par("cex"), cex.node.label = cex, - cex.edge.label = cex, col.node.label = tip.color, - col.edge.label = tip.color, font.node.label = font, - font.edge.label = font, ...) { - open3d <- rgl::open3d - segments3d <- rgl::segments3d - spheres3d <- rgl::spheres3d - texts3d <- rgl::texts3d - - edge <- net$edge - - x <- coords[, 1] - y <- coords[, 2] - z <- coords[, 3] - - nTips <- length(net$tip.label) - - segments3d(x[t(edge)], y[t(edge)], z[t(edge)], - col = rep(edge.color, each = 2), lwd = edge.width) - radius <- 0 - if (show.nodes) { - radius <- sqrt( (max(x) - min(x))^2 + (max(y) - min(y))^2 + - (max(z) - min(z))^2) / 200 - spheres3d(x[1:nTips], y[1:nTips], z[1:nTips], radius = 2 * radius, - color = "cyan") - spheres3d(x[-c(1:nTips)], y[-c(1:nTips)], z[-c(1:nTips)], radius = radius, - color = "magenta") - } - if (show.tip.label) { - if (is.null(net$translate)) - texts3d(x[1:nTips] + 2.05 * radius, y[1:nTips], z[1:nTips], - net$tip.label, color = tip.color, cex = cex, font = font) - else - texts3d(x[net$translate$node] + 2.05 * radius, y[net$translate$node], - z[net$translate$node], net$tip.label, color = tip.color, cex = cex, - font = font) - } - if (show.edge.label) { - ec <- edgeLabels(x, y, z, edge) - if (is.null(edge.label)) edge.label <- net$splitIndex - # else edge.label = net$splitIndex - texts3d(ec[, 1], ec[, 2], ec[, 3], edge.label, color = col.edge.label, - cex = cex.edge.label, font = font.edge.label) - } - if (show.node.label) { - texts3d(x, y, z, node.label, color = col.node.label, cex = cex.node.label, - font = font.node.label) - } -} - - -plot2D <- function(coords, net, show.tip.label = TRUE, show.edge.label = FALSE, - edge.label = NULL, show.node.label = FALSE, - node.label = NULL, tip.color = "blue", edge.color = "grey", - edge.width = 3, edge.lty = 1, font = 3, cex = par("cex"), - cex.node.label = cex, cex.edge.label = cex, - col.node.label = tip.color, col.edge.label = tip.color, - font.node.label = font, font.edge.label = font, - add = FALSE, ...) { - edge <- net$edge - label <- net$tip.label - xx <- coords[, 1] - yy <- coords[, 2] - nTips <- length(label) - - xlim <- range(xx) - ylim <- range(yy) - - if (show.tip.label) { - offset <- max(nchar(label)) * 0.018 * cex * diff(xlim) - xlim <- c(xlim[1] - offset, xlim[2] + offset) - ylim <- c(ylim[1] - 0.03 * cex * diff(ylim), ylim[2] + - 0.03 * cex * diff(ylim)) - } - if (!add) { - plot.new() - plot.window(xlim, ylim, asp = 1) - } - cladogram.plot(edge, xx, yy, edge.color, edge.width, edge.lty) - if (show.tip.label) { - if (is.null(net$translate)) ind <- match(1:nTips, edge[, 2]) - else ind <- match(net$translate$node, edge[, 2]) - pos <- rep(4, nTips) - XX <- xx[edge[ind, 1]] - xx[edge[ind, 2]] - pos[XX > 0] <- 2 - YY <- yy[edge[ind, 1]] - yy[edge[ind, 2]] - pos2 <- rep(3, nTips) - pos2[YY > 0] <- 1 - # needed if tiplabels are not at internal nodes - XX[is.na(XX)] <- 0 - YY[is.na(YY)] <- 0 - pos[abs(YY) > abs(XX)] <- pos2[abs(YY) > abs(XX)] - if (is.null(net$translate)) text(xx[1:nTips], yy[1:nTips], labels = label, - pos = pos, col = tip.color, cex = cex, font = font) - else text(xx[net$translate$node], yy[net$translate$node], labels = label, - pos = pos, col = tip.color, cex = cex, font = font) - } - if (show.edge.label) { - ec <- edgeLabels(xx, yy, edge = edge) - if (is.null(edge.label)) edge.label <- net$splitIndex - - # show only one edge label - em <- apply(ec, 1, function(x) max(abs(x))) - si <- net$splitIndex - for (i in unique(si)) { - tmp <- si == i - if (sum(tmp) > 1) { - w <- which(tmp) - wm <- which.max(em[w]) - edge.label[w[-wm]] <- "" - } - } - - text(ec[, 1], ec[, 2], labels = edge.label, col = col.edge.label, - cex = cex.edge.label, font = font.edge.label) - } - if (show.node.label) { - text(xx, yy, labels = node.label, col = col.node.label, - cex = cex.node.label, font = font.node.label) - } -} - - -closest.edge <- function(x, y, P1, P2) { - x1 <- P1[, 1] - x2 <- P2[, 1] - y1 <- P1[, 2] - y2 <- P2[, 2] - - A <- sqrt( (x2 - x)^2 + (y2 - y)^2) # d_BC - B <- sqrt( (x1 - x)^2 + (y1 - y)^2) # d_AC - C <- sqrt( (x1 - x2)^2 + (y1 - y2)^2) # d_AB - # Kosinussatz - alpha <- acos( (B^2 + C^2 - A^2) / (2 * B * C)) - beta <- acos( (A^2 + C^2 - B^2) / (2 * A * C)) - - d <- abs( (y2 - y1) * x - (x2 - x1) * y + x2 * y1 - y2 * x1) / - sqrt( (y2 - y1)^2 + (x2 - x1)^2) - d[alpha > (pi / 2)] <- B[alpha > (pi / 2)] - d[beta > (pi / 2)] <- A[beta > (pi / 2)] - d -} - -closest.node <- function(x, y, P) { - x1 <- P[, 1] - y1 <- P[, 2] - d <- sqrt((x1 - x)^2 + (y1 - y)^2) - d -} - - -#' Identify splits in a network -#' -#' \code{identify.networx} reads the position of the graphics pointer when the -#' mouse button is pressed. It then returns the split belonging to the edge -#' closest to the pointer. The network must be plotted beforehand. -#' -#' @param x an object of class \code{networx} -#' @param quiet a logical controlling whether to print a message inviting the -#' user to click on the tree. -#' @param \dots further arguments to be passed to or from other methods. -#' @return \code{identify.networx} returns a splits object. -#' @author Klaus Schliep \email{klaus.schliep@@gmail.com} -#' @seealso \code{\link[phangorn]{plot.networx}}, -#' \code{\link[graphics]{identify}} -#' @examples -#' \dontrun{ -#' data(yeast) -#' dm <- dist.ml(yeast) -#' nnet <- neighborNet(dm) -#' plot(nnet) -#' identify(nnet) # click close to an edge -#' } -#' @importFrom graphics identify -#' @method identify networx -#' @export -identify.networx <- function(x, quiet = FALSE, ...) { - if (!quiet) - cat("Click close to a node or edge of the tree...\n") - xy <- locator(1) - if (is.null(xy)) - return(NULL) - if (is.null(x$.plot)) { - lastPP <- get("last_plot.phylo", envir = .PlotPhyloEnv) - edge <- lastPP$edge - xx <- lastPP$xx - yy <- lastPP$yy - vertices <- cbind(xx, yy) - } - else { - lastPP <- x$.plot - edge <- x$edge - vertices <- lastPP$vertices - } - P1 <- vertices[edge[, 1], , drop = FALSE] - P2 <- vertices[edge[, 2], , drop = FALSE] - d <- closest.edge(xy$x, xy$y, P1, P2) - split <- x$splitIndex[which.min(d)] - x$splits[split] -} diff --git a/R/parsimony.R b/R/parsimony.R index 1882bf1a..4df93611 100644 --- a/R/parsimony.R +++ b/R/parsimony.R @@ -1,13 +1,17 @@ #' Parsimony tree. #' +#' \code{pratchet} implements the parsimony ratchet (Nixon, 1999) and is the +#' preferred way to search for the best parsimony tree. For small number of taxa +#' the function \code{\link{bab}} can be used to compute all most parsimonious +#' trees. #' #' \code{parsimony} returns the parsimony score of a tree using either the -#' sankoff or the fitch algorithm. \code{optim.parsimony} tries to find the -#' maximum parsimony tree using either Nearest Neighbor Interchange (NNI) -#' rearrangements or sub tree pruning and regrafting (SPR). \code{pratchet} -#' implements the parsimony ratchet (Nixon, 1999) and is the preferred way to -#' search for the best tree. \code{random.addition} can be used to produce -#' starting trees. +#' sankoff or the fitch algorithm. +#' \code{optim.parsimony} optimizes the topology using either Nearest Neighbor +#' Interchange (NNI) rearrangements or sub tree pruning and regrafting (SPR) and +#' is used inside \code{pratchet}. \code{random.addition} can be used to produce +#' starting trees and is an option for the argument perturbation in +#' \code{pratchet}. #' #' The "SPR" rearrangements are so far only available for the "fitch" method, #' "sankoff" only uses "NNI". The "fitch" algorithm only works correct for @@ -106,7 +110,7 @@ compressSites <- function(data) { ntaxa <- nrow(data) res <- vector("list", ntaxa) for(i in seq_len(ntaxa)) res[[i]] <- data[i, pos] - attrData$weight <- tapply(attrData$weight, index, sum) + attrData$weight <- as.vector(tapply(attrData$weight, index, sum)) attrData$index <- NULL attrData$nr <- length(attrData$weight) attrData$compressed <- TRUE @@ -189,21 +193,39 @@ upperBound <- function(x, cost = NULL) { #' \deqn{RI = \frac{MaxChanges - ObsChanges}{MaxChanges - MinChanges}}{RI = (MaxChanges - ObsChanges) / (MaxChanges - MinChanges)} #' #' @param data A object of class phyDat containing sequences. -#' @param tree tree to start the nni search from. +#' @param tree a phylogenetic tree, i.e. an object of class \code{phylo}. #' @param cost A cost matrix for the transitions between two states. #' @param sitewise return CI/RI for alignment or sitewise -#' +#' @returns a scalar or vector with the CI/RI vector. #' @seealso \code{\link{parsimony}}, \code{\link{pratchet}}, #' \code{\link{fitch}}, \code{\link{sankoff}}, \code{\link{bab}}, #' \code{\link{ancestral.pars}} +#' @examples +#' example(as.phylo.formula) +#' lab <- tr$tip.label +#' lab[79] <- "Herpestes fuscus" +#' tr$tip.label <- abbreviateGenus(lab) +#' X <- matrix(0, 112, 3, dimnames = list(tr$tip.label, c("Canis", "Panthera", +#' "Canis_Panthera"))) +#' desc_canis <- Descendants(tr, "Canis")[[1]] +#' desc_panthera <- Descendants(tr, "Panthera")[[1]] +#' X[desc_canis, c(1,3)] <- 1 +#' X[desc_panthera, c(2,3)] <- 1 +#' col <- rep("black", 112) +#' col[desc_panthera] <- "red" +#' col[desc_canis] <- "blue" +#' X <- phyDat(X, "USER", levels=c(0,1)) +#' plot(tr, "f", tip.color=col) +#' # The first two sites are homoplase free! +#' CI(tr, X, sitewise=TRUE) +#' RI(tr, X, sitewise=TRUE) #' #' @rdname CI #' @export CI <- function(tree, data, cost = NULL, sitewise = FALSE) { - if (sitewise) pscore <- sankoff(tree, data, cost = cost, site = "site") - else pscore <- sankoff(tree, data, cost = cost) - weight <- attr(data, "weight") data <- subset(data, tree$tip.label) + pscore <- sankoff(tree, data, cost, ifelse(sitewise, "site", "pscore")) + weight <- attr(data, "weight") m <- lowerBound(data, cost = cost) if (sitewise) { return( (m / pscore)[attr(data, "index")]) @@ -215,9 +237,8 @@ CI <- function(tree, data, cost = NULL, sitewise = FALSE) { #' @rdname CI #' @export RI <- function(tree, data, cost = NULL, sitewise = FALSE) { - if (sitewise) pscore <- sankoff(tree, data, cost = cost, site = "site") - else pscore <- sankoff(tree, data, cost = cost) data <- subset(data, tree$tip.label) + pscore <- sankoff(tree, data, cost, ifelse(sitewise, "site", "pscore")) weight <- attr(data, "weight") m <- lowerBound(data, cost = cost) g <- upperBound(data, cost = cost) @@ -309,6 +330,7 @@ pratchet <- function(data, start = NULL, method = "fitch", maxit = 1000, rearrangements = "SPR", perturbation = "ratchet", ...) { if(inherits(data, "DNAbin") || inherits(data, "AAbin")) data <- as.phyDat(data) + printevery <- 10L eps <- 1e-08 trace <- trace - 1 ref <- names(data) @@ -344,10 +366,9 @@ pratchet <- function(data, start = NULL, method = "fitch", maxit = 1000, return(tree) } - if (perturbation != "random_addition"){ - if(is.null(start)) start <- optim.parsimony(fastme.ols(dist.hamming(data)), - data, trace = trace-1, method = method, - rearrangements = rearrangements, ...) + if(is.null(start)) start <- optim.parsimony(random.addition(data), + data, trace = trace-1, method = method, + rearrangements = rearrangements, ...) tree <- start label <- intersect(tree$tip.label, names(data)) if (!is.binary(tree)){ @@ -359,8 +380,7 @@ pratchet <- function(data, start = NULL, method = "fitch", maxit = 1000, attr(tree, "pscore") <- parsimony(tree, data, method = method, ...) mp <- attr(tree, "pscore") if (trace >= 0) - print(paste("Best pscore so far:", attr(tree, "pscore"))) - } + cat("Parsimony score of initial tree:", attr(tree, "pscore"), "\n") FUN <- function(data, tree, method, rearrangements, ...) optim.parsimony(tree, data = data, method = method, rearrangements = rearrangements, ...) @@ -369,12 +389,10 @@ pratchet <- function(data, start = NULL, method = "fitch", maxit = 1000, result <- addTaxa(result, attr(data, "duplicated")) } result <- relabel(result, ref) +# if (trace > 1) cat("optimize topology (NNI): ", pscore, "-->", psc, "\n") hr <- hash(result) on.exit({ if (!all && inherits(result, "multiPhylo")) result <- result[[1]] -# if(!is.null(attr(data, "duplicated"))) -# result <- addTaxa(result, attr(data, "duplicated")) - # else class(result) <- "multiPhylo" if (length(result) == 1) result <- result[[1]] env <- new.env() start_trees <- start_trees[seq_len(i)] @@ -403,22 +421,18 @@ pratchet <- function(data, start = NULL, method = "fitch", maxit = 1000, bs_ind <- which(bsw > 0) bs_data <- getRows(data, bs_ind) attr(bs_data, "weight") <- bsw[bs_ind] - if(length(bs_ind) > 0)p_trees <- optim.parsimony(tree, bs_data, + if(length(bs_ind) > 0){ + # p_trees <- random.addition(bs_data) # 3 * ?? + p_trees <- optim.parsimony(tree, bs_data, trace = trace, method = method, rearrangements = rearrangements, ...) + } else p_trees <- stree(length(data), tip.label = names(data)) - trees <- optim.parsimony(p_trees, data, trace = trace, - method = method, rearrangements = rearrangements, ...) - } - if (perturbation == "stochastic") { - p_trees <- rNNI(tree, floor(nTips / 2)) - trees <- optim.parsimony(p_trees, data, trace = trace, method = method, - rearrangements = rearrangements, ...) - } - if (perturbation == "random_addition") { - p_trees <- random.addition(data) - trees <- optim.parsimony(p_trees, data, trace = trace, method = method, - rearrangements = rearrangements, ...) } + if (perturbation == "stochastic") p_trees <- rNNI(tree, floor(nTips / 2)) + if (perturbation == "random_addition") p_trees <- random.addition(data) + trees <- optim.parsimony(p_trees, data, trace = trace, method = method, + rearrangements = rearrangements, ...) + curr_tree <- trees if(!is.null(attr(data, "duplicated"))){ p_trees <- addTaxa(p_trees, attr(data, "duplicated")) trees <- addTaxa(trees, attr(data, "duplicated")) @@ -432,7 +446,7 @@ pratchet <- function(data, start = NULL, method = "fitch", maxit = 1000, if ( (mp1 + eps) < mp) { kmax <- 1 result <- trees - tree <- trees + tree <- curr_tree hr <- hash(trees) mp <- mp1 } @@ -446,8 +460,9 @@ pratchet <- function(data, start = NULL, method = "fitch", maxit = 1000, } } } - if (trace >= 0) - print(paste("Best pscore so far:", mp)) + if (trace >= 0 && (!i%%printevery)) + cat("\rIteration: ", i, ". Best parsimony score so far: ", mp, sep="") if ( (kmax >= k) && (i >= minit)) break() } # for + if (trace >= 0)cat("\n") } # pratchet diff --git a/R/phyDat.R b/R/phyDat.R index 567cbb9e..64431b65 100644 --- a/R/phyDat.R +++ b/R/phyDat.R @@ -39,7 +39,7 @@ #' @author Klaus Schliep \email{klaus.schliep@@gmail.com} #' @seealso \code{\link{DNAbin}}, \code{\link{as.DNAbin}}, #' \code{\link{baseFreq}}, \code{\link{glance.phyDat}}, \code{\link{dna2codon}}, -#' \code{\link{read.dna}}, \code{\link{read.aa}}, \code{\link{read.nexus.data}} +#' \code{\link{read.dna}}, \code{\link{read.nexus.data}} #' and the chapter 1 in the \code{vignette("AdvancedFeatures", #' package="phangorn")} and the example of \code{\link{pmlMix}} for the use of #' \code{\link{allSitePattern}}. @@ -98,7 +98,7 @@ cbind.phyDat <- function(..., gaps="-", compress=TRUE){ if (n == 1) return(x[[1]]) types <- sapply(x, function(x)attr(x, "type")) -# if(length(unique(types))>1) stop("All alignments need to have the same type!") + if(any(types!=types[1]))stop("Alignments must have same type!") nr <- numeric(n) ATTR <- attributes(x[[1]]) nr[1] <- sum(attr(x[[1]], "weight")) @@ -169,11 +169,11 @@ rbind.phyDat <- function(...){ l <- sapply(x, function(x)sum(attr(x, "weight"))) if(any(l!=l[1]))stop("Alignments have different # of characters!") if(any(types!=types[1]))stop("Alignments must have same type!") - nam <- sapply(x, names) + nam <- lapply(x, names) |> unlist() if(any(duplicated(nam)))stop("Duplicated names!") m <- lengths(x) mcs <- c(0, cumsum(m)) - res <- matrix(NA_character_, sum(m), l[1]) + res <- matrix(NA_character_, sum(m), l[1], dimnames=list(nam, NULL)) for(i in seq_along(x)){ res[(mcs[i]+1):mcs[i+1], ] <- as.character(x[[i]]) } @@ -197,7 +197,7 @@ compress.phyDat <- function(data){ pos <- which(!duplicated(index)) weight <- tapply(attrib$weight, index, sum) names(weight) <- NULL - attrib$weight <- weight + attrib$weight <- as.vector(weight) if(is.null(attrib$index)) attrib$index <-index else attrib$index <- index[attrib$index] for(i in seq_len(length(data))) data[[i]] <- data[[i]][pos] @@ -510,9 +510,9 @@ constSitePattern <- function(n, names=NULL, type="DNA", levels=NULL){ #' #' Felsenstein, J. (1993) Phylip (Phylogeny Inference Package) version 3.5c. #' Department of Genetics, University of Washington. -#' \url{https://evolution.genetics.washington.edu/phylip/phylip.html} +#' \url{https://phylipweb.github.io/phylip/} #' @keywords IO -#' @export read.aa +#' @noRd read.aa <- function (file, format = "interleaved", skip = 0, nlines = 0, comment.char = "#", seq.names = NULL){ getTaxaNames <- function(x) { @@ -540,10 +540,10 @@ read.aa <- function (file, format = "interleaved", skip = 0, nlines = 0, oop <- options(warn = -1) fl.num <- as.numeric(unlist(strsplit(gsub("^ +", "", fl), " +"))) options(oop) - if (all(is.na(fl.num))) + if (all(is.na(fl.num)) || length(fl.num) != 2) stop("the first line of the file must contain the dimensions of the data") - if (length(fl.num) != 2) - stop("the first line of the file must contain TWO numbers") +# if (length(fl.num) != 2) +# stop("the first line of the file must contain the dimensions of the data") else { n <- fl.num[1] s <- fl.num[2] diff --git a/R/phyDat2.R b/R/phyDat2.R index 763fb5b7..615bacca 100644 --- a/R/phyDat2.R +++ b/R/phyDat2.R @@ -172,10 +172,12 @@ phyDat.AA <- function (data, return.index = TRUE){ # AAbin if (inherits(data,"AAbin")){ if(is.list(data)) data <- as.matrix(data) - data <- as.character(data) + data <- as.character(data) # not needed + } + if(inherits(data, "character")){ + data <- as.matrix(data) +# data <- toupper(data) } - if(inherits(data, "character")) data <- as.matrix(data) - data <- toupper(data) aa <- c("A", "R", "N", "D", "C", "Q", "E", "G", "H", "I", "L", "K", "M", "F", "P", "S", "T", "W", "Y", "V") aa2 <- c("A", "R", "N", "D", "C", "Q", "E", "G", "H", "I", "L", "K", "M", "F", @@ -203,9 +205,23 @@ phyDat.AA <- function (data, return.index = TRUE){ n <- nrow(data) res <- vector("list", n) ind_na <- logical(length(weight)) - for(i in seq_len(n)){ - res[[i]] <- match(data[i, ind], aa2) - ind_na[is.na(res[[i]])] <- TRUE + + if(inherits(data,"AAbin")){ + cs <- rep(NA_integer_, 256) + # + cs[as.integer (sapply( aa2 , charToRaw) )] <- seq_along(aa2) + cs[as.integer (sapply( tolower(aa2) , charToRaw) )] <- seq_along(aa2) + for(i in seq_len(n)){ + res[[i]] <- cs[ as.integer(data[i, ind]) ] + # maybe check for NAs + # ind_na[is.na(res[[i]])] <- TRUE + } + } else { + data <- toupper(data) + for(i in seq_len(n)){ + res[[i]] <- match(data[i, ind], aa2) + ind_na[is.na(res[[i]])] <- TRUE + } } if(any(ind_na)){ warning("Found unknown characters (not supplied in levels). Deleted sites with unknown states.") @@ -239,9 +255,10 @@ phyDat.codon <- function (data, return.index = TRUE, ambiguity = "---", data <- as.matrix(data) data <- tolower(data) } - if (inherits(data,"DNAbin") || inherits(data, "phyDat")) + if (inherits(data,"DNAbin") || inherits(data, "phyDat")){ + if(is.list(data) && inherits(data, "phyDat")) data <- as.matrix(data) data <- as.character(data) - + } data[data=="u"] <- "t" stopcodon <- match.arg(stopcodon, c("exclude", "include")) @@ -257,7 +274,7 @@ phyDat.codon <- function (data, return.index = TRUE, ambiguity = "---", if(stopcodon!="include") codon <- codon[tmp != "*"] # no stop codons if(stopcodon=="exclude"){ - for(i in stopcodon){ + for(i in stop_codons){ data[data==i] <- NA } rm_stop <- which(is.na(data), arr.ind = TRUE)[,1] @@ -324,3 +341,9 @@ phyDat.codon <- function (data, return.index = TRUE, ambiguity = "---", res } + +## AA: add toupper to default +## DNA: add tolower, DNAbin to data +## default: create contrast from levels +## CODON: create contrast +## diff --git a/R/phyDat_conversion.R b/R/phyDat_conversion.R index 08e7f267..de664302 100644 --- a/R/phyDat_conversion.R +++ b/R/phyDat_conversion.R @@ -30,7 +30,7 @@ #' @author Klaus Schliep \email{klaus.schliep@@gmail.com} #' @seealso [DNAbin()], [as.DNAbin()], #' \code{\link{baseFreq}}, \code{\link{glance.phyDat}}, -#' \code{\link{read.dna}}, \code{\link{read.aa}}, \code{\link{read.nexus.data}} +#' \code{\link{read.dna}}, \code{\link{read.nexus.data}} #' and the chapter 1 in the \code{vignette("phangorn-specials", #' package="phangorn")} and the example of \code{\link{pmlMix}} for the use of #' \code{allSitePattern} @@ -87,6 +87,11 @@ as.phyDat.factor <- function(x, ...){ #' @export as.phyDat.DNAbin <- function(x, ...) phyDat.DNA(x, ...) +#' @rdname as.phyDat +#' @method as.phyDat AAbin +#' @export +as.phyDat.AAbin <- function(x, ...) phyDat.AA(x, ...) + #' @rdname as.phyDat #' @method as.phyDat alignment @@ -137,6 +142,22 @@ as.phyDat.MultipleAlignment <- function(x, ...){ } +#' @rdname as.phyDat +#' @method as.phyDat AAStringSet +#' @export +as.phyDat.AAStringSet <- function(x, ...){ + as.AAbin(x, ...) |> as.phyDat() +} + + +#' @rdname as.phyDat +#' @method as.phyDat DNAStringSet +#' @export +as.phyDat.DNAStringSet <- function(x, ...){ + as.DNAbin(x, ...) |> as.phyDat() +} + + # @rdname phyDat #' @export as.MultipleAlignment <- function (x, ...){ diff --git a/R/phylo.R b/R/phylo.R index cbccaff2..9f3fa581 100644 --- a/R/phylo.R +++ b/R/phylo.R @@ -68,9 +68,9 @@ optimCodon <- function(tree, data, Q, subs, syn, trace = 0L, ab = c(0, 0), } -subsChoice <- function(type = .dnamodels) { +subsChoice <- function(type = .dnamodels, has_gap_state=FALSE) { type <- match.arg(type) - switch(type, + res <- switch(type, JC = list(optQ = FALSE, optBf = FALSE, subs = c(0, 0, 0, 0, 0, 0)), F81 = list(optQ = FALSE, optBf = TRUE, subs = c(0, 0, 0, 0, 0, 0)), K80 = list(optQ = TRUE, optBf = FALSE, subs = c(0, 1, 0, 0, 1, 0)), @@ -95,6 +95,14 @@ subsChoice <- function(type = .dnamodels) { SYM = list(optQ = TRUE, optBf = FALSE, subs = c(1, 2, 3, 4, 5, 0)), GTR = list(optQ = TRUE, optBf = TRUE, subs = c(1, 2, 3, 4, 5, 0)) ) + if(has_gap_state){ + tmp <- matrix(0,4,4) + tmp[lower.tri(tmp)] <- res$subs + tmp <- rbind(tmp, max(tmp) + 1) + res$subs <- tmp[lower.tri(tmp)] + res$optQ <- TRUE + } + res } @@ -123,16 +131,20 @@ subsChoice_USER <- function(type = .usermodels, nstates) { optimGamma <- function(tree, data, shape = 1, k = 4, ...) { fn <- function(shape, tree, data, k, ...) pml.fit(tree, data, shape = shape, k = k, ...) - res <- optimize(f = fn, interval = c(0.1, 100), lower = 0.1, upper = 1000, - maximum = TRUE, tol = .01, tree = tree, data = data, k = k, ...) + res <- optimize(f = fn, interval = c(0.1, 100), lower = 0.1, upper = 100, + maximum = TRUE, tol = .001, tree = tree, data = data, k = k, ...) res } -optimInv <- function(tree, data, inv = 0.01, INV = NULL, ll.0 = NULL, ...) { +optimInv <- function(tree, data, inv = 0.01, INV, ...) { + weight <- as.double(attr(data, "weight")) + tmp <- as.vector( INV %*% rep(1, attr(data, "nc")) ) + ind <- which(tmp > 0) + max_inv <- sum(weight[ind]) / sum(weight) fn <- function(inv, tree, data, ...) pml.fit(tree, data, inv = inv, INV = INV, ll.0 = NULL, ...) - res <- optimize(f = fn, interval = c(0, 1), lower = 0, upper = 1, + res <- optimize(f = fn, interval = c(0, max_inv), lower = 0, upper = max_inv, maximum = TRUE, tol = .0001, tree = tree, data = data, ...) res } @@ -576,7 +588,7 @@ optimEdge <- function(tree, data, eig = eig, w = w, g = g, bf = bf, rate = rate, eps <- (old.ll - newll) / newll if (eps < 0) return(list(tree=oldtree, logLik=old.ll)) oldtree <- treeP - if (control$trace > 1) cat(old.ll, " -> ", newll, "\n") +# if (control$trace > 1) cat(old.ll, " -> ", newll, "\n") old.ll <- newll } if (control$trace > 0) @@ -625,9 +637,13 @@ readAArate <- function(file) { # save(.LG,.WAG,.Dayhoff,.JTT,.cpREV,.mtmam,.mtArt, file = "sysdata2.rda") -getModelAA <- function(model, bf = TRUE, Q = TRUE) { +getModelAA <- function(model, bf = TRUE, Q = TRUE, has_gap_state=FALSE) { model <- match.arg(eval(model), .aamodels) tmp <- get(paste(".", model, sep = ""), environment(pml)) + if(has_gap_state){ + tmp$Q <- add_gap_Q_AA(tmp$Q) + tmp$bf <- add_gap_bf_AA(tmp$bf) + } if (Q) assign("Q", tmp$Q, envir = parent.frame()) if (bf) assign("bf", tmp$bf, envir = parent.frame()) } @@ -657,7 +673,7 @@ guess_model <- function(x){ } - +# needs to go in phangorn extra optEdgeMulti <- function(object, control = pml.control(epsilon = 1e-8, maxit = 10, trace = 1, tau = 1e-8), ...) { tree <- object$tree @@ -700,23 +716,23 @@ optEdgeMulti <- function(object, control = pml.control(epsilon = 1e-8, # add data for internal use parent.frame(n) for higher nestings -update.pmlNew <- function(object, ..., evaluate = TRUE) { - call <- object$call - if (is.null(call)) - stop("need an object with call component") - extras <- match.call(expand.dots = FALSE)$... - if (length(extras)) { - existing <- !is.na(match(names(extras), names(call))) - for (a in names(extras)[existing]) call[[a]] <- extras[[a]] - if (any(!existing)) { - call <- c(as.list(call), extras[!existing]) - call <- as.call(call) - } - } - if (evaluate) - eval(call, object, parent.frame()) - else call -} +# update.pmlNew <- function(object, ..., evaluate = TRUE) { +# call <- object$call +# if (is.null(call)) +# stop("need an object with call component") +# extras <- match.call(expand.dots = FALSE)$... +# if (length(extras)) { +# existing <- !is.na(match(names(extras), names(call))) +# for (a in names(extras)[existing]) call[[a]] <- extras[[a]] +# if (any(!existing)) { +# call <- c(as.list(call), extras[!existing]) +# call <- as.call(call) +# } +# } +# if (evaluate) +# eval(call, object, parent.frame()) +# else call +# } #' @export @@ -793,7 +809,8 @@ update.pml <- function(object, ...) { if (!is.na(existing[9])) { model <- match.arg(eval(extras[[existing[9]]], parent.frame()), .aamodels) - getModelAA(model, bf = is.na(existing[3]), Q = is.na(existing[4])) + getModelAA(model, bf = is.na(existing[3]), Q = is.na(existing[4]), + has_gap_state = has_gap_state(data)) updateEig <- TRUE } else model <- object$model @@ -989,7 +1006,7 @@ pml.fit4 <- function(tree, data, bf = rep(1 / length(levels), length(levels)), if (ASC) { ind <- seq_len(nc) p0 <- sum(exp(siteLik[ind])) - if(is.nan(log(1 - p0))) browser() + if(p0 >= 1) stop("Error Mkv") loglik <- loglik - sum(weight) * log(1 - p0) } if (!site) return(loglik) @@ -1039,8 +1056,16 @@ pml.fit4 <- function(tree, data, bf = rep(1 / length(levels), length(levels)), #' @references Felsenstein, J. (1981) Evolutionary trees from DNA sequences: a #' maximum likelihood approach. \emph{Journal of Molecular Evolution}, #' \bold{17}, 368--376. +#' @examples +#' data(Laurasiatherian) +#' tree <- NJ(dist.ml(Laurasiatherian)) +#' bf <- rep(0.25, 4) +#' eig <- edQt() +#' pml.init(Laurasiatherian) +#' pml.fit(tree, Laurasiatherian, bf=bf, eig=eig) +#' pml.free() +#' pml(tree, Laurasiatherian) |> logLik() #' @keywords cluster -#' #' @rdname pml.fit #' @export pml.fit pml.fit <- function(tree, data, bf = rep(1 / length(levels), length(levels)), @@ -1354,7 +1379,8 @@ pml <- function(tree, data, bf = NULL, Q = NULL, inv = 0, k = 1, shape = 1, nc <- as.integer(attr(data, "nc")) if (type == "AA" & !is.null(model)) { model <- match.arg(model, .aamodels) - getModelAA(model, bf = is.null(bf), Q = is.null(Q)) + getModelAA(model, bf = is.null(bf), Q = is.null(Q), + has_gap_state = has_gap_state(data)) } if (type == "CODON") { .syn <- synonymous_subs(code=attr(data, "code")) @@ -1366,8 +1392,12 @@ pml <- function(tree, data, bf = NULL, Q = NULL, inv = 0, k = 1, shape = 1, model <- match.arg(model, .usermodels) if(model=="ORDERED") Q <- subsChoice_USER("ORDERED", nc)$Q } - if (is.null(bf)) - bf <- rep(1 / length(levels), length(levels)) + if (is.null(bf)){ + if(has_gap_state(data)){ + bf <- baseFreq(data) + bf[-nc] <- (1 - bf[nc]) / (nc-1) + } else bf <- rep(1 / nc, nc) + } if (is.character(bf)) { bf_choice <- match.arg(bf, c("equal", "empirical", "F1x4", "F3x4", "F61")) if (bf_choice == "F3x4" & type != "CODON") @@ -1479,14 +1509,14 @@ optimRooted <- function(tree, data, bf, g, w, eig, ll.0, loglik } # scale whole tree - scaleEdges <- function(tree, data, tau = 1e-8, ...) { #t = 1, trace = 0, + scaleEdges <- function(tree, data, tau = 1e-8, ...) { fn <- function(t, tree, data, ...) { tree$edge.length <- tree$edge.length * t pml.fit4(tree, data, ...) } min_scaler <- max(.25, tau / min(tree$edge.length) ) min_scaler <- min(min_scaler, 1) - if(min_scaler>1) browser() +# if(min_scaler>1) browser() optimize(f = fn, interval = c(min_scaler, 4), tree = tree, data = data, ..., maximum = TRUE, tol = .00001) } @@ -1949,8 +1979,8 @@ rooted.nni <- function(tree, data, eig, w, g, bf, rate, ll.0, INV, RELL=NULL, # if(aLRT) return(support_quartet) ll2 <- pml.fit4(tree, data, bf=bf, eig=eig, ll.0=ll.0, w=w, g=g, ...) eps <- (ll - ll2) / ll2 - if (control$trace > 1) cat(ll, " -> ", ll2, "\n") - if (control$trace > 1) cat("swap:", nchanges) + if (control$trace > 1) cat("optimize topology: ", ll, "-->", ll2, + " NNI moves: ", nchanges, "\n") ll <- ll2 iter <- iter + 1 } @@ -1987,6 +2017,70 @@ updateRates <- function(res, ll, rate, shape, k, inv, wMix, update="rate", } + +# help functions inside optim.pml +.ratchet_fun <- function(tree, data, ...){ + weight <- attr(data, "weight") + v <- rep(seq_along(weight), weight) + attr(data, "weight") <- tabulate(sample(v, replace = TRUE), + length(weight)) + res <- opt_Edge(tree, data, ...) + ll2 <- res[[2]] + opt_nni(tree, data, iter_max=5, trace=0, ll=ll2, ...)$tree +} + + +.di2multi2di <- function(tree2, tau, method){ + # tree2 <- di2multi(tree, tol = 10 * tau, tip2root = TRUE) # raus + if (!is.binary(tree2)) { + tree2 <- multi2di(tree2) + if(method=="unrooted" & is.rooted(tree2)) tree2 <- unroot(tree2) + tree2 <- minEdge(tree2, 3*tau, method=="ultrametric") + } + attr(tree2, "order") <- NULL + tree2 <- reorder(tree2, "postorder") + tree2 +} + + +.stochastic_fun <- function(tree, dm, tau, method, tip.dates, ratchet.par){ + tree <- di2multi(tree, tol = 10 * tau, tip2root = TRUE) + tree <- .di2multi2di(tree, tau, method) + tree <- reorder(tree, "postorder") + nTips <- Ntip(tree) + tree <- rNNI(tree, moves = round(nTips * ratchet.par$prop), n = 1) + if(method=="unrooted") return(tree) + if(method=="ultrametric"){ + tree <- nnls.tree(dm, tree) + tree <- midpoint(tree) + } + if(method=="tipdated") tree <- rtt(tree, tip.dates = tip.dates) + tree <- nnls.tree(dm, tree, method = method, tip.dates=tip.dates) + tree <- minEdge(tree, 5*tau, method=="ultrametric") + attr(tree, order) <- NULL + tree <- reorder(tree, "postorder") + tree +} + + +.updateQBF <- function(object, model, tmp){ + nc <- attr(object$data, "nc") + Q <- object$Q + bf <- object$bf + if(!tmp$optQ) Q <- rep(1, (nc*(nc-1L))/2) + if(model=="ORDERED") Q <- tmp$Q + if(!tmp$optBf){ + if(has_gap_state(object$data)){ + bf <- baseFreq(object$data) + bf[-nc] <- (1 - bf[nc]) / (nc-1) + } else bf <- rep(1 / nc, nc) + } + object <- update(object, Q=Q, bf=bf) + object +} + + + #' @rdname pml #' @aliases optim.pml #' @export @@ -2002,6 +2096,7 @@ optim.pml <- function(object, optNni = FALSE, optBf = FALSE, optQ = FALSE, optNni <- ifelse(rearrangement == "none", FALSE, TRUE) perturbation <- ifelse(rearrangement %in% c("ratchet", "stochastic", "multi2di"), TRUE, FALSE) + if(rearrangement=="ratchet") fbs <- vector("list", ratchet.par$minit) extras <- match.call(expand.dots = FALSE)$... pmla <- c("wMix", "llMix") wMix <- object$wMix @@ -2033,7 +2128,6 @@ optim.pml <- function(object, optNni = FALSE, optBf = FALSE, optQ = FALSE, addTaxa <- FALSE trace <- control$trace tau <- control$tau -# mit Zeile 2000 vereinheitlichen method <- "unrooted" is_ultrametric <- FALSE timetree <- FALSE @@ -2056,6 +2150,12 @@ optim.pml <- function(object, optNni = FALSE, optBf = FALSE, optQ = FALSE, } } } + if(optRooted == FALSE && optEdge == TRUE){ + # rescale tree if edges are likely too long + pscore <- fitch(tree, data) / sum(attr(data, "weight")) + if(sum(tree$edge.length) > 3*pscore) + tree$edge.length <- tree$edge.length * 3*pscore / sum(tree$edge.length) + } if (optNni) { if(!timetree){ mapping <- map_duplicates(data) @@ -2063,6 +2163,8 @@ optim.pml <- function(object, optNni = FALSE, optBf = FALSE, optQ = FALSE, orig.data <- data addTaxa <- TRUE tree <- drop.tip(tree, mapping[, 1]) + if(method=="unrooted") tree <- unroot(tree) + attr(tree, "order") <- NULL tree <- reorder(tree, "postorder") } } @@ -2118,15 +2220,20 @@ optim.pml <- function(object, optNni = FALSE, optBf = FALSE, optQ = FALSE, nr <- as.integer(attr(data, "nr")) nc <- as.integer(attr(data, "nc")) if (type == "DNA" & optModel) { - tmp <- subsChoice(model) + # .optBFQ + tmp <- subsChoice(model, has_gap_state(data)) optQ <- tmp$optQ if (!optQ) { - Q <- rep(1, 6) + Q <- rep(1, (nc*(nc-1L))/2) object <- update.pml(object, Q = Q) } optBf <- tmp$optBf if (!optBf){ - bf <- c(0.25, 0.25, 0.25, 0.25) + if(has_gap_state(data)){ + bf <- baseFreq(data) + bf[-nc] <- (1 - bf[nc]) / (nc-1) + } else bf <- rep(1 / nc, nc) + #bf <- c(0.25, 0.25, 0.25, 0.25) } else bf <- baseFreq(data) object <- update.pml(object, bf = bf) subs <- tmp$subs @@ -2241,6 +2348,11 @@ optim.pml <- function(object, optNni = FALSE, optBf = FALSE, optQ = FALSE, spl <- as.splits(bs) object$tree <- addConfidences(object$tree, spl) } + if(rearrangement=="ratchet"){ + class(fbs) <- "multiPhylo" + object$abs <- object$bs + object$bs <- fbs + } pml.free() return(object) }) @@ -2343,35 +2455,40 @@ optim.pml <- function(object, optNni = FALSE, optBf = FALSE, optQ = FALSE, } if (optFreeRate) { # bis jetzt w nicht optimiert! - tmp_ll <- ll - res <- optimFreeRate(tree, data, g = g, k = k, w = w, inv = inv, + tmp_ll <- ll_fr <- ll + eps_fr <- 1e8 + iter_fr <- 0 + while(eps_fr > control$epsilon & iter_fr < 3){ + res <- optimFreeRate(tree, data, g = g, k = k, w = w, inv = inv, INV = INV, bf = bf, eig = eig, ll.0 = ll.0, rate = rate) - scale <- function(tree, g, w){ - blub <- sum(g * w) - g <- g / blub - tree$edge.length <- tree$edge.length * blub - list(tree=tree, g=g) - } - if(res[[2]] > ll){ - tmp_sc <- scale(tree, res[[1]], w) - g0 <- res[[1]] - blub <- sum(g0 * w) - g <- g0 / blub - tree$edge.length <- tree$edge.length * blub -## if (trace > 0) cat("optimize free rate parameters: ", ll, "-->", -## max(res[[2]], ll), "\n") - ll <- res[[2]] - } - res2 <- optimWs(tree, data, w = w, g=g, inv = inv, - INV = INV, bf = bf, eig = eig, - ll.0 = ll.0, rate = rate) - if(res2[[2]] > ll){ - w <- res2[[1]] - blub <- sum(g * w) - g <- g / blub - tree$edge.length <- tree$edge.length * blub - ll <- res2[[2]] +# scale <- function(tree, g, w){ +# blub <- sum(g * w) +# g <- g / blub +# tree$edge.length <- tree$edge.length * blub +# list(tree=tree, g=g) +# } + if(res[[2]] > ll){ +# tmp_sc <- scale(tree, res[[1]], w) + g0 <- res[[1]] + blub <- sum(g0 * w) + g <- g0 / blub + tree$edge.length <- tree$edge.length * blub + ll <- res[[2]] + } + res2 <- optimWs(tree, data, w = w, g=g, inv = inv, + INV = INV, bf = bf, eig = eig, + ll.0 = ll.0, rate = rate) + if(res2[[2]] > ll){ + w <- res2[[1]] + blub <- sum(g * w) + g <- g / blub + tree$edge.length <- tree$edge.length * blub + ll <- res2[[2]] + } + eps_fr <- ll - ll_fr + ll_fr <- ll + iter_fr <- iter_fr+1 } if (trace > 0) cat("optimize free rate parameters: ", tmp_ll, "-->", ll, "\n") @@ -2412,28 +2529,36 @@ optim.pml <- function(object, optNni = FALSE, optBf = FALSE, optQ = FALSE, } for(i in seq_len(maxit)){ if(rearrangement == "stochastic"){ - tree2 <- di2multi(tree, tol = 10 * tau, tip2root = TRUE) - if (!is.binary(tree2)) { - tree2 <- multi2di(tree2) - if(!optRooted) tree2 <- unroot(tree2) - tree2 <- minEdge(tree2, tau) - tree2 <- reorder(tree2, "postorder") - } - tree2 <- rNNI(tree2, moves = round(nTips * ratchet.par$prop), n = 1) - if(optRooted){ - tree2 <- nnls.tree(dm, tree2, method = method, - tip.dates=tip.dates) - tree2 <- minEdge(tree2, 10*tau) - } + tree2 <- .stochastic_fun(tree, dm, tau, method, tip.dates, + ratchet.par) + +# tree2 <- di2multi(tree, tol = 10 * tau, tip2root = TRUE) +# if (!is.binary(tree2)) { +# tree2 <- multi2di(tree2) +# if(!optRooted) tree2 <- unroot(tree2) +# tree2 <- minEdge(tree2, tau) +# tree2 <- reorder(tree2, "postorder") +# } +# tree2 <- rNNI(tree2, moves = round(nTips * ratchet.par$prop), n = 1) +# if(optRooted){ +# tree2 <- nnls.tree(dm, tree2, method = method, tip.dates=tip.dates) +# tree2 <- minEdge(tree2, 10*tau) +# } } else if(rearrangement == "ratchet"){ - tree2 <- bootstrap.phyDat(data, candidate_tree, bs = 1, method=method, - eps = tau, bf = bf, Q = Q, k = k, shape = shape, - tip.dates=tip.dates)[[1]] - tree2 <- checkLabels(tree2, tree$tip.label) - tree2 <- reorder(tree2, "postorder") + tree2 <- .ratchet_fun(tree, data, rooted=optRooted, w = w, g = g, + eig = eig, bf = bf, inv=inv, + rate=rate, ll.0 = ll.0, INV = INV, llMix = llMix, + wMix=wMix, ASC=ASC, + control=list(eps=1e-08, maxit=3, trace=trace-1, tau=tau)) + fbs[[i]] <- tree2 +# tree2 <- bootstrap.phyDat(data, candidate_tree, bs = 1, method=method, +# eps = tau, bf = bf, Q = Q, k = k, shape = shape, +# tip.dates=tip.dates)[[1]] +# tree2 <- checkLabels(tree2, tree$tip.label) +# tree2 <- reorder(tree2, "postorder") } else if(rearrangement == "multi2di"){ tree2 <- di2multi(tree, tol=10*tau, tip2root=TRUE) - if(any(degree(tree2)>4)){ + if(any(tabulate(tree2$edge)>3)){ tree2 <- multi2di(tree2) if(!optRooted) tree2 <- unroot(tree2) tree2 <- minEdge(tree2, tau) @@ -2595,8 +2720,6 @@ optimQuartet <- function(tree, data, eig, w, g, bf, rate, ll.0, nTips, tau <- control$tau lg <- k ScaleEPS <- 1.0 / 4294967296.0 - # anc <- Ancestors(tree, 1:m, "parent") - # anc0 <- as.integer(c(0L, anc)) while (eps > control$eps && iter < control$maxit) { EL <- .Call("optQrtt", as.integer(parent), as.integer(child), eig, evi, @@ -2616,7 +2739,7 @@ optimQuartet <- function(tree, data, eig, w, g, bf, rate, ll.0, nTips, # if (control$trace > 1) cat(old.ll, " -> ", newll, "\n") old.ll <- newll } - if (control$trace > 0) cat(start.ll, " -> ", newll, "\n") +# if (control$trace > 0) cat(start.ll, " -> ", newll, "\n") list(tree = tree, logLik = newll, c(eps, iter)) } @@ -2630,10 +2753,6 @@ pml.quartet <- function(tree, data, bf = rep(.25, 4), k = 1, rate = 1, g, w, if (is.null(ll.0)) { ll.0 <- numeric(nr) } -# if (is.null(ind.ll0)) { -# ind <- which(ll.0 > 0) -# } -# else ind <- ind.ll0 node <- as.integer(tree$edge[, 1] - nTips - 1L) # min(node)) edge <- as.integer(tree$edge[, 2] - 1L) @@ -2704,7 +2823,6 @@ pml.nni <- function(tree, data, w, g, eig, bf, ll.0, ll, inv, wMix, llMix, tree0 <- index2tree(INDEX[i, ], tree, nTips + 1L) ch <- ei[5] pa <- ei[6] - # move up while (pa != loli) { tmpr <- match(loli, INDEX[, 5]) @@ -2736,7 +2854,6 @@ pml.nni <- function(tree, data, w, g, eig, bf, ll.0, ll, inv, wMix, llMix, ll.0 = ll.0, nTips = nTips, weight = weight, nr = nr, nc = nc, contrast = contrast, nco = nco, inv=inv, llcomp = ll + 1e-8, wMix=wMix, llMix=llMix, ...) - # new0$logLik+1e-8) new2 <- optimQuartet(tree2, data, eig = eig, w = w, g = g, bf = bf, ll.0 = ll.0, nTips = nTips, weight = weight, nr = nr, nc = nc, contrast = contrast, nco = nco, inv=inv, @@ -2825,7 +2942,7 @@ opt_nni <- function(tree, data, rooted, iter_max, trace, ll, RELL=NULL, ...){ } if(!is.null(RELL)) RELL <- tmp$RELL ll2 <- res$logLik - if(length(ll2)==0) browser() +# if(length(ll2)==0) browser() if(ll2 > (ll + 1e-8)) # epsR tree <- res$tree else { diff --git a/R/plotAnc.R b/R/plotAnc.R new file mode 100644 index 00000000..9ea7fca4 --- /dev/null +++ b/R/plotAnc.R @@ -0,0 +1,239 @@ +getTransition <- function(scheme, levels){ + l <- length(scheme$properties) + P <- matrix(0, length(levels), l, + dimnames = list(levels, names(scheme$properties))) + for(i in seq_along(scheme$properties)){ + ind <- match(scheme$properties[[i]], levels) + P[ind,i] <- 1 + } + P +} + + +getAncDF <- function(x){ + tmp <- x$data + contrast <- attr(tmp, "contrast") + attr(tmp, "contrast") <- contrast / rowSums(contrast) + tmp <- new2old.phyDat(tmp) + df <- list2df_ancestral(tmp, x$data) + if(!identical(colnames(x$prob), colnames(df))){ + if(identical(toupper(colnames(x$prob)), toupper(colnames(df)))){ + colnames(x$prob) <- colnames(df) + } else stop("Invalid prob object") + } + rbind(df, x$prob) +} + + +#' Plot ancestral character on a tree +#' +#' \code{plotAnc} plots a phylogeny and adds character to the nodes. Either +#' takes output from \code{ancestral.pars} or \code{ancestral.pml} or from an +#' alignment where there are node labels in the tree match the constructed +#' sequences in the alignment. +#' +#' For further details see vignette("Ancestral"). +#' +## @param tree a tree, i.e. an object of class pml or an object of class +## ancestral. +#' @param x an object of class \code{ancestral}. +## @param site.pattern logical, plot i-th site pattern or i-th site +#' @param i plots the i-th site. +## ,site +## @param which either "pie" or "seqlogo" +#' @param node to plot for which the probabilities should be plotted. +#' @param type a character string specifying the type of phylogeny to be drawn; +#' it must be one of "phylogram" (the default), "cladogram", "fan", "unrooted", +#' "radial", "tidy", or any unambiguous abbreviation of these. +#' @param start start position to plot. +#' @param end end position to plot. +#' @param col a vector containing the colors for all possible states. +#' @param cex.pie a numeric defining the size of the pie graphs. +#' @param pos a character string defining the position of the legend. +#' @param scheme a predefined color scheme. For amino acid options are "Ape_AA", +#' "Zappo_AA", "Clustal", "Polarity" and "Transmembrane_tendency", for +#' nucleotides "Ape_NT" and"RY_NT". Names can be abbreviated. +#' @param \dots Further arguments passed to or from other methods. +#' @returns \code{plotAnc} returns silently x. +#' @author Klaus Schliep \email{klaus.schliep@@gmail.com} +#' @seealso \code{\link{ancestral.pml}}, \code{\link[ape]{plot.phylo}}, +#' \code{\link[ape]{image.DNAbin}}, \code{\link[ape]{image.AAbin}} +#' \code{\link[ggseqlogo]{ggseqlogo}} +#' @keywords plot +#' @examples +#' +#' example(NJ) +#' # generate node labels to ensure plotting will work +#' tree <- makeNodeLabel(tree) +#' anc.p <- ancestral.pars(tree, Laurasiatherian) +#' # plot the third character +#' plotAnc(anc.p, 3, pos="topright") +#' plotSeqLogo(anc.p, node="Node10", 1, 25) +#' +#' data(chloroplast) +#' tree <- pratchet(chloroplast, maxit=10, trace=0) +#' tree <- makeNodeLabel(tree) +#' anc.ch <- ancestral.pars(tree, chloroplast) +#' image(as.phyDat(anc.ch)[, 1:25]) +#' plotAnc(anc.ch, 21, scheme="Ape_AA", pos="topleft") +#' plotAnc(anc.ch, 21, scheme="Clustal", pos="topleft") +#' plotSeqLogo(anc.ch, node="Node1", 1, 25, scheme="Clustal") +#' @importFrom grDevices hcl.colors +#' @importFrom ggseqlogo make_col_scheme ggseqlogo +#' @rdname plot.ancestral +#' @export +plotAnc <- function(x, i = 1, type="phylogram", ..., col = NULL, + cex.pie = .5, pos = "bottomright", scheme=NULL) { + stopifnot(inherits(x, "ancestral")) + type <- match.arg(type, c("phylogram", "cladogram", "fan", "unrooted", + "radial", "tidy")) + phylo_clado <- type %in% c("phylogram", "cladogram") + df <- getAncDF(x) + data <- x$data + tree <- x$tree + subset <- df[,"Site"] == i + Y <- df[subset & !is.na(subset),] +# Y <- subset(df, Site==i) + y <- as.matrix(Y[, -c(1:3)]) + # y <- y[, -c(1:3)] + colnames(y) <- gsub("p_", "", colnames(y)) + row.names(y) <- Y$Node + y <- y[c(tree$tip.label, tree$node.label), ] + if(is.null(tree$node.label) || any(is.na(match(tree$node.label, rownames(y)))) || + is.numeric(tree$node.label)) + tree <- makeNodeLabel(tree) + + if(any(is.na(match(c(tree$tip.label, tree$node.label), rownames(y))))) + stop("Tree needs nodelabel, which match the labels of the alignment!") + CEX <- cex.pie + xrad <- CEX * diff(par("usr")[1:2]) / 50 + levels <- attr(data, "levels") + nc <- attr(data, "nc") + if(is.null(scheme) & attr(data, "type")=="AA") scheme <- "Ape_AA" + if(is.null(scheme) & attr(data, "type")=="DNA") scheme <- "Ape_NT" + if(!is.null(scheme)){ + scheme <- match.arg(scheme, c("Ape_AA", "Zappo_AA", "Clustal", "Polarity", + "Transmembrane_tendency", "Ape_NT", "RY_NT")) + sc <- get(scheme, environment(ace)) + if(has_gap_state(data) && attr(data, "type")=="AA"){ + sc$properties <- c(sc$properties, Gap="-") + sc$color <- c(sc$color, "#FFFFFF") + } + if(attr(data, "type")=="DNA"){ + ind <- match("n", names(sc$properties)) + if(!is.na(ind)){ + sc$properties <- sc$properties[-ind] + sc$color <- sc$color[-ind] + } + } + P <- getTransition(sc, levels) + y <- y %*% P + levels <- colnames(P) + col <- sc$col + nc <- ncol(y) + } + plot(tree, label.offset = 1.1 * xrad, plot = FALSE, type=type, ...) + lastPP <- get("last_plot.phylo", envir = .PlotPhyloEnv) + XX <- lastPP$xx + YY <- lastPP$yy + xrad <- CEX * diff(lastPP$x.lim * 1.1) / 50 + par(new = TRUE) + plot(tree, label.offset = 1.1 * xrad, plot = TRUE, type=type, ...) + if (is.null(col)) col <- hcl.colors(nc) #rainbow(nc) + if(!is.null(names(col))) col <- col[attr(data, "levels")] + if (length(col) != nc) { + warning("Length of color vector differs from number of levels!") + } + BOTHlabels( + pie = y, XX = XX, YY = YY, adj = c(0.5, 0.5), frame = "rect", pch = NULL, + sel = seq_along(XX), thermo = NULL, piecol = col, col = "black", + bg = "lightblue", horiz = FALSE, width = NULL, height = NULL, cex = cex.pie + ) +# if(legend) legend(pos, legend=levels, pch=21, pt.bg = col) + if (!is.null(pos)) legend(pos, legend=levels, pch=21, pt.bg = col) + invisible(x) +} + + +my_ggseqlogo <-function (data, facet = "wrap", scales = "free_x", ncol = NULL, + nrow = NULL, start=NULL, end=NULL, ...) +{ + x <- geom_logo(data = data, ...) + x[[2]] <- scale_x_continuous(limits = c(start-0.5, end+.5) , + breaks=pretty(seq(start, end))) + p <- ggplot() + x + theme_logo() + if (!"list" %in% class(data)) return(p) + facet <- match.arg(facet, c("grid", "wrap")) + if (facet == "grid") { + p <- p + facet_grid(~seq_group, scales = scales) + } + else if (facet == "wrap") { + p <- p + facet_wrap(~seq_group, scales = scales, nrow = nrow, ncol = ncol) + } + return(p) +} + + +#' @rdname plot.ancestral +#' @importFrom ggplot2 scale_x_continuous ggplot facet_grid facet_wrap +#' @importFrom ggseqlogo geom_logo theme_logo +#' @returns \code{plotSeqLogo} returns a ggplot object. +#' @export +plotSeqLogo <- function(x, node=getRoot(x$tree), start=1, end=10, scheme="Ape_NT", ...){ + stopifnot(inherits(x, "ancestral")) + type <- attr(x$data, "type") + tree <- x$tree + df <- getAncDF(x) + nodes <- c(tree$tip.label, tree$node.label) + if(is.numeric(node)) node <- nodes[node] + subset <- df[,"Node"] == node + X <- df[subset & !is.na(subset),] +# X2 <- subset(df, subset=Node==node) + end <- min(end, nrow(X)) + X <- X[seq_len(end), , drop=FALSE] # creating the whole plot is slow + X <- t(as.matrix(X[, -c(1:3)])) + tmp <- gsub("p_", "", rownames(X)) + lev <- rownames(X) <- toupper(tmp) + if(is.null(scheme) & type=="AA") scheme <- "Ape_AA" + if(is.null(scheme) & type=="DNA") scheme <- "Ape_NT" + if(!is.null(scheme)){ + scheme <- match.arg(scheme, c("Ape_AA", "Zappo_AA", "Clustal", "Polarity", + "Transmembrane_tendency", "Ape_NT", "RY_NT")) + sc <- get(scheme, environment(ace)) + if(has_gap_state(x$data) && type=="AA"){ + sc$properties <- c(sc$properties, Gap="-") + sc$color <- c(sc$color, "#FFFFFF") + } + l <- lengths(sc$properties) + SC <- make_col_scheme(chars = toupper(unlist(sc$properties)), + groups = rep(names(sc$properties), l), + cols=rep(sc$color, l)) + + } + else SC <- make_col_scheme(chars=lev, cols= hcl.colors(length(lev))) + my_ggseqlogo(X, col_scheme=SC, method='p', start=start, end=end) +} + + +##' @rdname plot.ancestral +##' @export +#image.ancestral <- function(x, ...){ +# tmp <- rbind(x$"data", x$"state") +# image(tmp, ...) +#} + +##' @rdname plot.ancestral +##' @export +#plot.ancestral <- function(x, which = c("pie", "seqlogo"), site = 1, +# node=getRoot(x$tree), col = NULL, cex.pie = .5, pos = "bottomright", +# scheme=NULL, start=1, end=10, ...){ +# stopifnot(inherits(x, "ancestral")) +# which <- match.arg(which, c("pie", "seqlogo"), TRUE) +# if(which=="pie")plotAnc(x, i = site, col = col, cex.pie = cex.pie, pos = pos, +# scheme=scheme, ...) +# if(which=="seqlogo")plotSeqLogo(x, node, start=start, end=end, scheme=scheme, ...) +#} + + + + diff --git a/R/plotBS.R b/R/plotBS.R new file mode 100644 index 00000000..5588f297 --- /dev/null +++ b/R/plotBS.R @@ -0,0 +1,177 @@ +support <- function(tree, trees, method="FBP", tol=2e-8, scale=TRUE){ + trees <- keep.tip(trees, tree$tip.label) + method <- match.arg(method, c("FBP", "TBE", "MCC"), several.ok=TRUE) + multi <- ifelse(length(method)>1, TRUE, FALSE) + tip2root <- ifelse(method=="MCC", TRUE, FALSE) + if(all(sapply(trees, \(x)!is.null(x$edge.length)))){ + trees <- di2multi(trees, tol=tol) # , tip2root=tip2root) + } + if(multi) X <- matrix(NA, Nnode(tree), length(method), + dimnames = list(NULL, method)) + if("MCC" %in% method){ + trees <- .uncompressTipLabel(trees) # check if needed + if(any(!is.rooted(trees))) + stop("All trees need to be rooted for method 'MCC'!") + x <- prop.clades(tree, trees) + x <- (x / length(trees)) + if(!scale) x <- x * 100 + if(multi) X[, "MCC"] <- x + } + if("FBP" %in% method){ + trees <- .uncompressTipLabel(trees) # check if needed + if (any(is.rooted(trees))) trees <- unroot(trees) + x <- prop.clades(tree, trees) + x <- (x / length(trees)) + if(!scale) x <- x * 100 + if(multi) X[, "FBP"] <- x + } + if("TBE" %in% method){ + x <- transferBootstrap(tree, trees, FALSE, scale=scale) + if(multi) X[, "TBE"] <- x + } + if(multi) return(X) + x +} + + + +#' Plotting trees with bootstrap values +#' +#' \code{plotBS} plots a phylogenetic tree with the bootstrap values assigned +#' to the (internal) edges. It can also used to assign bootstrap values to a +#' phylogenetic tree. \code{add_support} adds support values to a plot. +#' +#' The functions can either assign the classical Felsenstein’s bootstrap +#' proportions (FBP) (Felsenstein (1985), Hendy & Penny (1985)) or the +#' transfer bootstrap expectation (TBE) of Lemoine et al. (2018). Using the +#' option \code{type=="n"} just assigns the bootstrap values and return the tree +#' without plotting it. +#' +#' +#' @param tree The tree on which edges the bootstrap values are plotted. +#' @param trees a list of trees (object of class "multiPhylo"). +#' @param type the type of tree to plot, one of "phylogram", "cladogram", "fan", +#' "unrooted", "radial" or "none". If type is "none" the tree is returned with +#' the bootstrap values assigned to the node labels. +#' @param method either "FBP" the classical bootstrap (default), "TBE" +#' (transfer bootstrap) or "MCC" for assigning clade credibilities. In case of +#' "MCC" all trees need to be rooted. +#' @param bs.col color of bootstrap support labels. +#' @param bs.adj one or two numeric values specifying the horizontal and +#' vertical justification of the bootstrap labels. +#' @param digits integer indicating the number of decimal places. +#' @param p only plot support values higher than this percentage number +#' (default is 0). +#' @param sep seperator between the different methods. +#' @param \dots further parameters used by \code{plot.phylo}. +#' @param frame a character string specifying the kind of frame to be printed +#' around the bootstrap values. This must be one of "none" (the default), +#' "rect" or "circle". +#' @param tol a numeric value giving the tolerance to consider a branch length +#' significantly greater than zero. +#' @param scale return ratio or percentage. +#' @return \code{plotBS} returns silently a tree, i.e. an object of class +#' \code{phylo} with the bootstrap values as node labels. The argument +#' \code{trees} is optional and if not supplied the labels supplied +#' in the \code{node.label} slot will be used. +#' @author Klaus Schliep \email{klaus.schliep@@gmail.com} +#' @seealso \code{\link{plot.phylo}}, \code{\link{add_ci}}, +#' \code{\link{nodelabels}}, +#' \code{\link{prop.clades}}, \code{\link{maxCladeCred}}, +#' \code{\link{transferBootstrap}}, \code{\link{consensus}}, +#' \code{\link{consensusNet}} +#' @references Felsenstein J. (1985) Confidence limits on phylogenies. An +#' approach using the bootstrap. \emph{Evolution} \bold{39}, 783--791 +#' +#' Lemoine, F., Entfellner, J. B. D., Wilkinson, E., Correia, D., Felipe, M. D., +#' De Oliveira, T., & Gascuel, O. (2018). Renewing Felsenstein’s phylogenetic +#' bootstrap in the era of big data. \emph{Nature}, \bold{556(7702)}, 452--456. +#' +#' Penny D. and Hendy M.D. (1985) Testing methods evolutionary tree +#' construction. \emph{Cladistics} \bold{1}, 266--278 +#' +#' Penny D. and Hendy M.D. (1986) Estimating the reliability of evolutionary +#' trees. \emph{Molecular Biology and Evolution} \bold{3}, 403--417 +#' @examples +#' fdir <- system.file("extdata/trees", package = "phangorn") +#' # RAxML best-known tree with bipartition support (from previous analysis) +#' raxml.tree <- read.tree(file.path(fdir,"RAxML_bipartitions.woodmouse")) +#' # RAxML bootstrap trees (from previous analysis) +#' raxml.bootstrap <- read.tree(file.path(fdir,"RAxML_bootstrap.woodmouse")) +#' par(mfrow=c(1,2)) +#' plotBS(raxml.tree, raxml.bootstrap, "p") +#' plotBS(raxml.tree, raxml.bootstrap, "p", "TBE") +#' @rdname plotBS +#' @export +plotBS <- function(tree, trees, type = "phylogram", method="FBP", + bs.col = "black", bs.adj = NULL, digits=3, p = 0, + frame = "none", tol=1e-6, sep = "/", ...) { + type <- match.arg(type, c("phylogram", "cladogram", "fan", "unrooted", + "radial", "none")) + if(inherits(tree, "pml")) tree <- tree$tree + if(!inherits(tree, "phylo")) stop("tree must be of class phylo") +# method <- match.arg(method, c("FBP", "TBE", "MCC"), several.ok=TRUE) +# wird in support gecheckt + if (hasArg(trees)) { + x <-support(tree, trees, method=method, tol=tol) + x <- round(x, digits=digits) + if(length(method)>1) x <- apply(x, 1, paste, collapse=sep) + tree$node.label <- x + } + else { + if (is.null(tree$node.label)) stop("You need to supply 'trees' or the tree needs support-values as node.label") + x <- tree$node.label + } + if(type=="none") return( tree ) + plot(tree, type = type, ...) + + label <- c(rep(0, length(tree$tip.label)), x) + ind <- get("last_plot.phylo", envir = .PlotPhyloEnv)$edge[ ,2 ] + if (type == "phylogram" | type == "cladogram") { + root <- getRoot(tree) + label <- c(rep(0, length(tree$tip.label)), x) + label[root] <- 0 + ind <- which(label > p) + if (is.null(bs.adj)) { + bs.adj <- c(1, 1) + } + if (length(ind) > 0) { + if(is.numeric(label)) label <- round(label, digits = digits) + nodelabels( + text = label[ind], node = ind, + frame = frame, col = bs.col, adj = bs.adj, ... + ) + } + } + else { + if (is.null(bs.adj)) { + bs.adj <- c(0.5, 0.5) + } + ind2 <- which(label[ind] > p) + if (length(ind2 > 0)) { + if(is.numeric(label)) label <- round(label, digits = digits) + edgelabels(label[ind][ind2], ind2, + frame = frame, + col = bs.col, adj = bs.adj, ... + ) + } + } + invisible(tree) +} + + +#' @rdname plotBS +#' @export +add_support <- function(tree, trees, method="FBP", tol=1e-8, + scale=TRUE, frame="none", digits=3, sep="/", ...){ + lastPP <- get("last_plot.phylo", envir = .PlotPhyloEnv) +# if(!all.equal(lastPP$edge, reorder(tree)$edge)) +# stop("tree seems to differ from the last plot!") + x <- support(tree, trees, method=method, tol=tol, scale=scale) + x <- round(x, digits=digits) + if(length(method)>1) x <- apply(x, 1, paste, collapse=sep) + drawSupportOnEdges(x, frame=frame, ...) +} + + + diff --git a/R/plot_networx.R b/R/plot_networx.R new file mode 100644 index 00000000..70de35f9 --- /dev/null +++ b/R/plot_networx.R @@ -0,0 +1,575 @@ +# some trigonemetric functions +rad2deg <- function(rad) (rad * 180) / (pi) +deg2rad <- function(deg) (deg * pi) / (180) + +# circular mean +# https://en.wikipedia.org/wiki/Mean_of_circular_quantities +circ.mean <- function(deg) { + rad.m <- (deg * pi) / (180) + mean.cos <- mean(cos(rad.m)) + mean.sin <- mean(sin(rad.m)) + + theta <- rad2deg(atan(mean.sin / mean.cos)) + if (mean.cos < 0) theta <- theta + 180 + if ((mean.sin < 0) & (mean.cos > 0)) theta <- theta + 360 + theta +} + + +spl2angle <- function(x) { + l <- length(attr(x, "labels")) + ord <- 1:l + if (!is.null(attr(x, "cycle"))) ord <- attr(x, "cycle") + x <- changeOrder(x, attr(x, "labels")[ord]) + y <- lapply(x, function(x, l) (x - 1) / l * 360, l = l) + angle <- vapply(y, circ.mean, 0) |> deg2rad() + # angle <- ((vapply(x, sum, 0) / lengths(x) - 1) / l ) * 2*pi + # kreis2kart(attr(x, "weight"), angle) + angle +} + + +coords.equal.angle <- function(obj) { + if (is.null(attr(obj, "order")) || (attr(obj, "order") == "postorder")) + obj <- reorder.networx(obj) + spl <- obj$splits + spl <- SHORTwise(spl) #, length(obj$tip.label)) + l <- length(obj$edge.length) + # ind1 <- which(!duplicated(obj$splitIndex)) + n <- max(obj$edge) + angle <- spl2angle(spl) + weight <- attr(spl, "weight") + k <- matrix(0, max(obj$splitIndex), 2) + + res <- matrix(0, max(obj$edge), 2) + for (i in 1:l) { # unique(obj$splitIndex) + j <- obj$edge[i, 1] + m <- obj$edge[i, 2] + p <- obj$splitIndex[i] + res[m, ] <- res[j, ] + kreis2kart(weight[p], angle[p]) + } + res +} + + +#' @rdname phangorn-internal +#' @export +coords <- function(obj, dim = "3D") { + # if(is.null(attr(obj,"order")) || (attr(obj, "order")=="postorder") ) + # obj = reorder.networx(obj) + if (dim == "equal_angle") return(coords.equal.angle(obj)) + + l <- length(obj$edge.length) + ind1 <- which(!duplicated(obj$splitIndex)) + + n <- max(obj$edge) + adj <- spMatrix(n, n, i = obj$edge[, 2], j = obj$edge[, 1], + x = rep(1, length(obj$edge.length))) + g <- graph_from_adjacency_matrix(adj, "undirected") + if (dim == "3D") { + coord <- layout_nicely(g, dim = 3) + k <- matrix(0, max(obj$splitIndex), 3) + for (i in ind1) { + tmp <- coord[obj$edge[i, 2], ] - coord[obj$edge[i, 1], ] + k[obj$splitIndex[i], ] <- kart2kugel(tmp[1], tmp[2], tmp[3]) + } + k[obj$splitIndex[ind1], 1] <- obj$edge.length[ind1] + + res <- matrix(0, vcount(g), 3) + for (i in 1:l) { + j <- obj$edge[i, 1] + m <- obj$edge[i, 2] + p <- obj$splitIndex[i] + res[m, ] <- res[j, ] + kugel2kart(k[p, 1], k[p, 2], k[p, 3]) + } + } + else { + coord <- layout_nicely(g, dim = 2) + k <- matrix(0, max(obj$splitIndex), 2) + tmp <- coord[obj$edge[ind1, 2], ] - coord[obj$edge[ind1, 1], ] + k[obj$splitIndex[ind1], ] <- kart2kreis(tmp[,1], tmp[,2]) + # for (i in ind1) { + # tmp <- coord[obj$edge[i, 2], ] - coord[obj$edge[i, 1], ] + # k[obj$splitIndex[i], ] <- kart2kreis(tmp[1], tmp[2]) + # } + k[obj$splitIndex[ind1], 1] <- obj$edge.length[ind1] + res <- matrix(0, vcount(g), 2) + for (i in 1:l) { + j <- obj$edge[i, 1] + m <- obj$edge[i, 2] + p <- obj$splitIndex[i] + res[m, ] <- res[j, ] + kreis2kart(k[p, 1], k[p, 2]) + } + } + res +} + + +kart2kugel <- function(x, y, z) { + r <- sqrt(x * x + y * y + z * z) + alpha <- atan(sqrt(x * x + y * y) / z) + if (z < 0) alpha <- alpha + pi + beta <- atan(y / x) + if (x < 0) beta <- beta + pi + c(r, alpha, beta) +} + + +kart2kreis <- function(x, y) { + r <- sqrt(x * x + y * y) + alpha <- atan(y / x) + #if (x < 0) alpha <- alpha + pi + if (any(x < 0)) alpha[x < 0] <- alpha[x < 0] + pi + cbind(r, alpha) +} + + +kreis2kart <- function(r, alpha) { + c(r * cos(alpha), r * sin(alpha)) + # if(length(r)>1) return(matrix(c(r*cos(alpha), r*sin(alpha)), ncol=2)) + # else return(c(r*cos(alpha), r*sin(alpha))) +} + + +kugel2kart <- function(r, alpha, beta) { + x <- r * sin(alpha) * cos(beta) + y <- r * sin(alpha) * sin(beta) + z <- r * cos(alpha) + c(x, y, z) +} + + +edgeLabels <- function(xx, yy, zz = NULL, edge) { + XX <- (xx[edge[, 1]] + xx[edge[, 2]]) / 2 + YY <- (yy[edge[, 1]] + yy[edge[, 2]]) / 2 + if (!is.null(zz)) { + ZZ <- (zz[edge[, 1]] + zz[edge[, 2]]) / 2 + return(cbind(XX, YY, ZZ)) + } + cbind(XX, YY) +} + + +rotate_matrix <- function(x, theta){ + rot_matrix <- matrix(c(cos(theta), sin(theta), -sin(theta), cos(theta)), + 2, 2, byrow = TRUE) + x %*% rot_matrix +} + + +#' plot phylogenetic networks +#' +#' So far not all parameters behave the same on the the \code{rgl} \code{"3D"} +#' and basic graphic \code{"2D"} device. +#' +#' Often it is easier and safer to supply vectors of graphical parameters for +#' splits (e.g. splits.color) than for edges. These overwrite values edge.color. +#' +#' @param x an object of class \code{"networx"} +#' @param type "3D" to plot using rgl or "equal angle" and "2D" in the normal +#' device. +#' @param use.edge.length a logical indicating whether to use the edge weights +#' of the network to draw the branches (the default) or not. +#' @param show.tip.label a logical indicating whether to show the tip labels on +#' the graph (defaults to \code{TRUE}, i.e. the labels are shown). +#' @param show.edge.label a logical indicating whether to show the tip labels +#' on the graph. +#' @param edge.label an additional vector of edge labels (normally not needed). +#' @param show.node.label a logical indicating whether to show the node labels +#' (see example). +#' @param node.label an additional vector of node labels (normally not needed). +#' @param show.nodes a logical indicating whether to show the nodes (see +#' example). +#' @param tip.color the colors used for the tip labels. +#' @param edge.color the colors used to draw edges. +#' @param edge.width the width used to draw edges. +#' @param edge.lty a vector of line types. +#' @param split.color the colors used to draw edges. +#' @param split.width the width used to draw edges. +#' @param split.lty a vector of line types. +#' @param font an integer specifying the type of font for the labels: 1 (plain +#' text), 2 (bold), 3 (italic, the default), or 4 (bold italic). +#' @param cex a numeric value giving the factor scaling of the labels. +#' @param cex.node.label a numeric value giving the factor scaling of the node +#' labels. +#' @param cex.edge.label a numeric value giving the factor scaling of the edge +#' labels. +#' @param col.node.label the colors used for the node labels. +#' @param col.edge.label the colors used for the edge labels. +#' @param font.node.label the font used for the node labels. +#' @param font.edge.label the font used for the edge labels. +#' @param underscore a logical specifying whether the underscores in tip labels +#' should be written as spaces (the default) or left as are (if TRUE). +#' @param angle rotate the plot. +#' @param digits if edge labels are numerical a positive integer indicating how +#' many significant digits are to be used. +#' @param \dots Further arguments passed to or from other methods. +#' @returns \code{plot.networx} returns invisibly a list with paramters of the +#' plot. +#' @rdname plot.networx +#' @note The internal representation is likely to change. +#' @author Klaus Schliep \email{klaus.schliep@@gmail.com} +#' @seealso \code{\link{consensusNet}}, \code{\link{neighborNet}}, +#' \code{\link{splitsNetwork}}, \code{\link{hadamard}}, +#' \code{\link{distanceHadamard}}, \code{\link{as.networx}}, +#' \code{\link[ape]{evonet}}, \code{\link[ape]{as.phylo}}, +#' \code{\link{densiTree}}, \code{\link[ape]{nodelabels}} +#' @references Dress, A.W.M. and Huson, D.H. (2004) Constructing Splits Graphs +#' \emph{IEEE/ACM Transactions on Computational Biology and Bioinformatics +#' (TCBB)}, \bold{1(3)}, 109--115 +#' +#' Schliep, K., Potts, A. J., Morrison, D. A. and Grimm, G. W. (2017), +#' Intertwining phylogenetic trees and networks. \emph{Methods Ecol Evol}. +#' \bold{8}, 1212--1220. doi:10.1111/2041-210X.12760 +#' @keywords plot +#' @importFrom igraph make_graph +#' @examples +#' +#' set.seed(1) +#' tree1 <- rtree(20, rooted=FALSE) +#' sp <- as.splits(rNNI(tree1, n=10)) +#' net <- as.networx(sp) +#' plot(net) +#' plot(net, direction="axial") +#' \dontrun{ +#' # also see example in consensusNet +#' example(consensusNet) +#' } +#' @importFrom igraph graph_from_adjacency_matrix vcount topo_sort layout_nicely +#' @method plot networx +#' @export +plot.networx <- function(x, type = "equal angle", use.edge.length = TRUE, + show.tip.label = TRUE, show.edge.label = FALSE, + edge.label = NULL, show.node.label = FALSE, + node.label = NULL, show.nodes = FALSE, + tip.color = "black", edge.color = "black", + edge.width = 3, edge.lty = 1, split.color = NULL, + split.width = NULL, split.lty = NULL, font = 3, + cex = par("cex"), cex.node.label = cex, + cex.edge.label = cex, col.node.label = tip.color, + col.edge.label = tip.color, font.node.label = font, + font.edge.label = font, underscore = FALSE, + angle=0, digits=3, ...) { + type <- match.arg(type, c("equal angle", "3D", "2D")) + if (use.edge.length == FALSE){ + x$edge.length[] <- 1 + attr(x$splits, "weight") <- rep(1, length(x$splits)) + } + nTips <- length(x$tip.label) + conf <- attr(x$splits, "confidences") + index <- x$splitIndex + if(!is.null(edge.label) && is.numeric(edge.label)) edge.label <- prettyNum(edge.label) + if (is.null(edge.label) && !is.null(conf)) { + conf <- conf[index] + if(is.numeric(conf)) conf <- prettyNum(format(conf, digits=digits)) + if (!is.null(x$translate)) conf[match(x$translate$node, x$edge[, 2])] <- "" + else conf[x$edge[, 2] <= nTips] <- "" + edge.label <- conf + } + if (is.null(node.label)) node.label <- as.character(1:max(x$edge)) + if (show.tip.label) node.label[1:nTips] <- "" + if (show.tip.label){ + if (is.expression(x$tip.label)) underscore <- TRUE + if (!underscore) x$tip.label <- gsub("_", " ", x$tip.label) + } + + lspl <- max(x$splitIndex) + if (!is.null(split.color)) { + if (length(split.color) != lspl) + stop("split.color must be same length as splits") + else edge.color <- split.color[x$splitIndex] + } + if (!is.null(split.width)) { + if (length(split.width) != lspl) + stop("split.color must be same length as splits") + else edge.width <- split.width[x$splitIndex] + } + if (!is.null(split.lty)) { + if (length(split.lty) != lspl) + stop("split.color must be same length as splits") + else edge.lty <- split.lty[x$splitIndex] + } + + chk <- FALSE + + if (type == "3D") chk <- requireNamespace("rgl", quietly = TRUE) + if (!chk && type == "3D") { + warning("type='3D' requires the package 'rgl', plotting in '2D' instead!\n") + type <- "2D" + } + # use precomputed vertices when available + coord <- NULL + if (!is.null(x$.plot)) coord <- x$.plot$vertices + + if (type == "3D") { + if (is.null(coord) || ncol(coord) != 3) + coord <- coords(x, dim = "3D") + plotRGL(coord, x, show.tip.label = show.tip.label, + show.edge.label = show.edge.label, edge.label = edge.label, + show.node.label = show.node.label, node.label = node.label, + show.nodes = show.nodes, tip.color = tip.color, edge.color = edge.color, + edge.width = edge.width, font = font, cex = cex, + cex.node.label = cex.node.label, cex.edge.label = cex.edge.label, + col.node.label = col.node.label, col.edge.label = col.edge.label, + font.node.label = font.node.label, font.edge.label = font.edge.label) + } + else { + if (is.null(coord) || ncol(coord) != 2) { + if (type == "equal angle") coord <- coords.equal.angle(x) + else coord <- coords(x, dim = "2D") + } + if(angle != 0){ + angle <- angle * pi/180 # + coord <- rotate_matrix(coord, angle) + } + plot2D(coord, x, show.tip.label = show.tip.label, + show.edge.label = show.edge.label, edge.label = edge.label, + show.node.label = show.node.label, node.label = node.label, + show.nodes = show.nodes, tip.color = tip.color, edge.color = edge.color, + edge.width = edge.width, edge.lty = edge.lty, font = font, cex = cex, + cex.node.label = cex.node.label, cex.edge.label = cex.edge.label, + col.node.label = col.node.label, col.edge.label = col.edge.label, + font.node.label = font.node.label, font.edge.label = font.edge.label, + add = FALSE, ...) + } + x$.plot <- list(vertices = coord, edge.color = edge.color, + edge.width = edge.width, edge.lty = edge.lty) + invisible(x) +} + + +plotRGL <- function(coords, net, show.tip.label = TRUE, show.edge.label = FALSE, + edge.label = NULL, show.node.label = FALSE, + node.label = NULL, show.nodes = FALSE, tip.color = "blue", + edge.color = "grey", edge.width = 3, font = 3, + cex = par("cex"), cex.node.label = cex, + cex.edge.label = cex, col.node.label = tip.color, + col.edge.label = tip.color, font.node.label = font, + font.edge.label = font, ...) { + open3d <- rgl::open3d + segments3d <- rgl::segments3d + spheres3d <- rgl::spheres3d + texts3d <- rgl::texts3d + + edge <- net$edge + + x <- coords[, 1] + y <- coords[, 2] + z <- coords[, 3] + + nTips <- length(net$tip.label) + + segments3d(x[t(edge)], y[t(edge)], z[t(edge)], + col = rep(edge.color, each = 2), lwd = edge.width) + radius <- 0 + if (show.nodes) { + radius <- sqrt( (max(x) - min(x))^2 + (max(y) - min(y))^2 + + (max(z) - min(z))^2) / 200 + spheres3d(x[1:nTips], y[1:nTips], z[1:nTips], radius = 2 * radius, + color = "cyan") + spheres3d(x[-c(1:nTips)], y[-c(1:nTips)], z[-c(1:nTips)], radius = radius, + color = "magenta") + } + if (show.tip.label) { + if (is.null(net$translate)) + texts3d(x[1:nTips] + 2.05 * radius, y[1:nTips], z[1:nTips], + net$tip.label, color = tip.color, cex = cex, font = font) + else + texts3d(x[net$translate$node] + 2.05 * radius, y[net$translate$node], + z[net$translate$node], net$tip.label, color = tip.color, cex = cex, + font = font) + } + if (show.edge.label) { + ec <- edgeLabels(x, y, z, edge) + if (is.null(edge.label)) edge.label <- net$splitIndex + # else edge.label = net$splitIndex + texts3d(ec[, 1], ec[, 2], ec[, 3], edge.label, color = col.edge.label, + cex = cex.edge.label, font = font.edge.label) + } + if (show.node.label) { + texts3d(x, y, z, node.label, color = col.node.label, cex = cex.node.label, + font = font.node.label) + } +} + + +plot2D <- function(coords, net, show.tip.label = TRUE, show.edge.label = FALSE, + edge.label = NULL, show.node.label = FALSE, + node.label = NULL, tip.color = "blue", edge.color = "grey", + edge.width = 3, edge.lty = 1, font = 3, cex = par("cex"), + cex.node.label = cex, cex.edge.label = cex, + col.node.label = tip.color, col.edge.label = tip.color, + font.node.label = font, font.edge.label = font, + add = FALSE, direction="horizontal", xlim=NULL, ylim=NULL, + ...) { + direction <- match.arg(direction, c("horizontal", "axial")) + edge <- net$edge + label <- net$tip.label + xx <- coords[, 1] + yy <- coords[, 2] + nTips <- length(label) + if(is.null(xlim)){ + xlim <- range(xx) + offset <- max(nchar(label)) * 0.018 * cex * diff(xlim) + if (show.tip.label) xlim <- c(xlim[1] - offset, xlim[2] + offset) + } + if(is.null(ylim)){ + ylim <- range(yy) + if (show.tip.label){ + if(direction=="axial"){ + offset <- max(nchar(label)) * 0.018 * cex * diff(ylim) + ylim <- c(ylim[1] - offset, ylim[2] + offset) + } else ylim <- c(ylim[1] - 0.03 * cex * diff(ylim), + ylim[2] + 0.03 * cex * diff(ylim)) + } + } + if (!add) { + plot.new() + plot.window(xlim, ylim, asp = 1) + } + cladogram.plot(edge, xx, yy, edge.color, edge.width, edge.lty) + if (show.tip.label) { + if (is.null(net$translate)) ind <- match(1:nTips, edge[, 2]) + else ind <- match(net$translate$node, edge[, 2]) + if(direction=="horizontal"){ + pos <- rep(4, nTips) + XX <- xx[edge[ind, 1]] - xx[edge[ind, 2]] + pos[XX > 0] <- 2 + YY <- yy[edge[ind, 1]] - yy[edge[ind, 2]] + pos2 <- rep(3, nTips) + pos2[YY > 0] <- 1 + # needed if tiplabels are not at internal nodes + XX[is.na(XX)] <- 0 + YY[is.na(YY)] <- 0 + pos[abs(YY) > abs(XX)] <- pos2[abs(YY) > abs(XX)] + if (is.null(net$translate)) text(xx[1:nTips], yy[1:nTips], labels = label, + pos = pos, col = tip.color, cex = cex, font = font) + else text(xx[net$translate$node], yy[net$translate$node], labels = label, + pos = pos, col = tip.color, cex = cex, font = font) + } + else { + XX <- xx[edge[ind, 2]] - xx[edge[ind, 1]] + YY <- yy[edge[ind, 2]] - yy[edge[ind, 1]] + angle <- kart2kreis(XX, YY)[,2] + adj <- abs(angle) > pi/2 + angle <- angle * 180/pi # switch to degrees + angle[adj] <- angle[adj] - 180 + adj <- as.numeric(adj) + ## `srt' takes only a single value, so can't vectorize this: + ## (and need to 'elongate' these vectors:) + font <- rep(font, length.out = nTips) + tip.color <- rep(tip.color, length.out = nTips) + cex <- rep(cex, length.out = nTips) + for (i in seq_along(label)) + text(xx[i], yy[i], label[i], font = font[i], + cex = cex[i], srt = angle[i], adj = adj[i], + col = tip.color[i]) + } + } + + if (show.edge.label) { + ec <- edgeLabels(xx, yy, edge = edge) + if (is.null(edge.label)) edge.label <- net$splitIndex + + # show only one edge label + em <- apply(ec, 1, function(x) max(abs(x))) + si <- net$splitIndex + for (i in unique(si)) { + tmp <- si == i + if (sum(tmp) > 1) { + w <- which(tmp) + wm <- which.max(em[w]) + edge.label[w[-wm]] <- "" + } + } + + text(ec[, 1], ec[, 2], labels = edge.label, col = col.edge.label, + cex = cex.edge.label, font = font.edge.label) + } + if (show.node.label) { + text(xx, yy, labels = node.label, col = col.node.label, + cex = cex.node.label, font = font.node.label) + } + PP <- list(Ntip = nTips, type = "networx", edge = net$edge, xx = coords[, 1], + yy = coords[, 2], x.lim=xlim, y.lim=ylim, align.tip.label=FALSE) + assign("last_plot.phylo", PP, envir = .PlotPhyloEnv) +} + + +closest.edge <- function(x, y, P1, P2) { + x1 <- P1[, 1] + x2 <- P2[, 1] + y1 <- P1[, 2] + y2 <- P2[, 2] + + A <- sqrt( (x2 - x)^2 + (y2 - y)^2) # d_BC + B <- sqrt( (x1 - x)^2 + (y1 - y)^2) # d_AC + C <- sqrt( (x1 - x2)^2 + (y1 - y2)^2) # d_AB + # Kosinussatz + alpha <- acos( (B^2 + C^2 - A^2) / (2 * B * C)) + beta <- acos( (A^2 + C^2 - B^2) / (2 * A * C)) + + d <- abs( (y2 - y1) * x - (x2 - x1) * y + x2 * y1 - y2 * x1) / + sqrt( (y2 - y1)^2 + (x2 - x1)^2) + d[alpha > (pi / 2)] <- B[alpha > (pi / 2)] + d[beta > (pi / 2)] <- A[beta > (pi / 2)] + d +} + +closest.node <- function(x, y, P) { + x1 <- P[, 1] + y1 <- P[, 2] + d <- sqrt((x1 - x)^2 + (y1 - y)^2) + d +} + + +#' Identify splits in a network +#' +#' \code{identify.networx} reads the position of the graphics pointer when the +#' mouse button is pressed. It then returns the split belonging to the edge +#' closest to the pointer. The network must be plotted beforehand. +#' +#' @param x an object of class \code{networx} +#' @param quiet a logical controlling whether to print a message inviting the +#' user to click on the tree. +#' @param \dots further arguments to be passed to or from other methods. +#' @return \code{identify.networx} returns a splits object. +#' @author Klaus Schliep \email{klaus.schliep@@gmail.com} +#' @seealso \code{\link[phangorn]{plot.networx}}, +#' \code{\link[graphics]{identify}} +#' @examples +#' \dontrun{ +#' data(yeast) +#' dm <- dist.ml(yeast) +#' nnet <- neighborNet(dm) +#' plot(nnet) +#' identify(nnet) # click close to an edge +#' } +#' @importFrom graphics identify +#' @method identify networx +#' @export +identify.networx <- function(x, quiet = FALSE, ...) { + if (!quiet) + cat("Click close to a node or edge of the tree...\n") + xy <- locator(1) + if (is.null(xy)) + return(NULL) + if (is.null(x$.plot)) { + lastPP <- get("last_plot.phylo", envir = .PlotPhyloEnv) + edge <- lastPP$edge + xx <- lastPP$xx + yy <- lastPP$yy + vertices <- cbind(xx, yy) + } + else { + lastPP <- x$.plot + edge <- x$edge + vertices <- lastPP$vertices + } + P1 <- vertices[edge[, 1], , drop = FALSE] + P2 <- vertices[edge[, 2], , drop = FALSE] + d <- closest.edge(xy$x, xy$y, P1, P2) + split <- x$splitIndex[which.min(d)] + x$splits[split] +} diff --git a/R/plot_pml.R b/R/plot_pml.R index a9ae6a45..553eb864 100644 --- a/R/plot_pml.R +++ b/R/plot_pml.R @@ -1,6 +1,6 @@ #' Plot phylogeny of a pml object #' -#' \code{plot.pml} is a warapper around \code{plot.phylo} with different default +#' \code{plot.pml} is a wrapper around \code{plot.phylo} with different default #' values for unrooted, ultrametric and tip dated phylogenies. #' #' @param x an object of class \code{pml} or \code{phyDat}. @@ -11,6 +11,8 @@ #' Four values are possible: "rightwards" (the default), "leftwards", "upwards", #' and "downwards". #' @param \dots further parameters to be passed to \code{plot.phylo}. +#' @return \code{plot.pml} returns invisibly a list with arguments dexcribing the plot. +#' For further details see the \code{plot.phylo}. #' @author Klaus Schliep \email{klaus.schliep@@gmail.com} #' @seealso \code{\link{plot.phylo}}, \code{\link{axisPhylo}}, #' \code{\link{add.scale.bar}} @@ -38,8 +40,13 @@ plot.pml <- function(x, type="phylogram", direction = "rightwards", ...){ type <- match.arg(type, c("phylogram","cladogram", "fan", "unrooted", "radial", "tidy")) - plot.phylo(x$tree, type=type, direction=direction, ...) - if(is.rooted(x$tree) && (type %in% c("phylogram","cladogram"))){ + tree <- x$tree + extras <- match.call(expand.dots = FALSE)$... + cex <- ifelse(is.null(extras$cex), par("cex"), extras$cex) + cex.axis <- ifelse(is.null(extras$cex.axis), cex, extras$cex.axis) + if(!is.rooted(tree) && (type != "unrooted") ) tree <- midpoint(tree) + L <- plot.phylo(tree, type=type, direction=direction, ...) + if(is.rooted(tree) && (type %in% c("phylogram","cladogram"))){ direction <- match.arg(direction, c("rightwards", "leftwards", "upwards", "downwards")) side <- switch(direction, @@ -49,9 +56,14 @@ plot.pml <- function(x, type="phylogram", direction = "rightwards", ...){ "downwards" = 2) if(!is.null(x$tip.dates) && x$method=="tipdated"){ root_time <- max(x$tip.dates) - max(node.depth.edgelength(x$tree)) - axisPhylo(side, root.time = root_time, backward = FALSE) + axisPhylo(side, root.time = root_time, backward = FALSE, + cex.axis=cex.axis) } - else axisPhylo(side) + else if(!is.null(x$method) && x$method=="ultrametric") + axisPhylo(side, cex.axis=cex.axis) + else add.scale.bar(cex=cex) } - else add.scale.bar() + else add.scale.bar(cex=cex) + if(!is.null(x$bs)) add_support(tree, x$bs, cex=cex) + invisible(L) } diff --git a/R/pmlMix.R b/R/pmlMix.R index eeae51e1..d701b9ff 100644 --- a/R/pmlMix.R +++ b/R/pmlMix.R @@ -531,7 +531,8 @@ pmlMix <- function(formula, fit, m = 2, omega = rep(1 / m, m), pl0 <- ll[, -i, drop = FALSE] %*% omega[-i] fits[[i]] <- update(fits[[i]], llMix = pl0, wMix = omega[i]) } - if(trace >0) cat("logLik optim.pml start:", sum(weight * log(ll %*% omega)), "\n") + if(trace >0) cat("logLik optim.pml start:", + sum(weight * log(ll %*% omega)), "\n") for (i in 1:r) { pl0 <- ll[, -i, drop = FALSE] %*% omega[-i] fits[[i]] <- optim.pml(fits[[i]], optNni = MixNni, optBf = MixBf, @@ -541,7 +542,8 @@ pmlMix <- function(formula, fit, m = 2, omega = rep(1 / m, m), trace - 1), llMix = pl0, wMix = omega[i]) ll[, i] <- fits[[i]]$lv - if(trace >0) cat("logLik optim.pml", i, ": ", sum(weight * log(ll %*% omega)), "\n") + if(trace >0) cat("logLik optim.pml", i, ": ", + sum(weight * log(ll %*% omega)), "\n") # res <- optW(ll, weight, omega) # omega <- res$p if (MixRate) { diff --git a/R/pmlPart.R b/R/pmlPart.R index 3d2893e2..7f42f4b7 100644 --- a/R/pmlPart.R +++ b/R/pmlPart.R @@ -226,32 +226,10 @@ makePart <- function(fit, rooted, weight = ~index + genes) { #' @rdname pmlPart #' @export -#multiphyDat2pmlPart <- function(x, rooted = FALSE, ...) { -# shared_tree <- TRUE -# if (shared_tree) { -# concatenate_x <- do.call(cbind.phyDat, x@seq) -# dm <- dist.ml(concatenate_x) -# if (!rooted) tree <- NJ(dm) -# else tree <- upgma(dm) -# } -# else tree <- NULL -# fun <- function(x, rooted = FALSE, tree, ...) { -# if (is.null(tree)) { -# dm <- dist.ml(x) -# if (!rooted) tree <- NJ(dm) -# else tree <- upgma(dm) -# } -# pml(tree, x, ...) -# } -# fits <- lapply(x@seq, fun, tree = tree, rooted = rooted, ...) -# fits -#} -# -# multiphyDat2pmlPart <- function(x, method="unrooted", tip.dates=NULL, ...) { if(is.list(x) && all(sapply(x,inherits, "phyDat"))) seq <- x else if(inherits(x, "multiphyDat")) seq <- x@seq - else stop("x must be of class 'multiphyDat' or a list of 'phyDat' objects!") + else stop("x must be of class multiphyDat or a list of phyDat objects") shared_tree <- TRUE if (shared_tree) { concatenate_x <- do.call(cbind.phyDat, seq) diff --git a/R/pml_bb.R b/R/pml_bb.R index 4c1992c8..9e2d1a7a 100644 --- a/R/pml_bb.R +++ b/R/pml_bb.R @@ -41,10 +41,10 @@ #' @keywords cluster #' @examples #' -#' \dontrun{ #' data(woodmouse) -#' tmp <- pml_bb(woodmouse) +#' tmp <- pml_bb(woodmouse, model="HKY+I", rearrangement="NNI") #' +#' \dontrun{ #' data(Laurasiatherian) #' mt <- modelTest(Laurasiatherian) #' fit <- pml_bb(mt) @@ -89,6 +89,12 @@ pml_bb <- function(x, model=NULL, rearrangement="stochastic", if(method=="tipdated" && !is.null(attr(start, "rate"))) fit <- update(fit, rate=attr(start, "rate")) } + if(optRooted && !is.rooted(fit$tree)){ + start <- candidate_tree(fit$data, method=method, + tip.dates = tip.dates, eps=1e-7) + fit <- update(fit, tree=start) + if(method=="tipdated") fit <- update(fit, rate=attr(start, "rate")) + } type <- attr(fit$data, "type") para <- split_model(model, type) if(type=="AA" && para$optFreq){ @@ -121,7 +127,7 @@ split_model <- function(x="GTR + G(4) + I", type="DNA"){ tmp <- match(m, mods) if(all(is.na(tmp))) stop("Could not find model!") else pos <- tmp[!is.na(tmp)] - if(length(pos)>1) stop("Error, fould several models!") + if(length(pos)>1) stop("Error, found several models!") model <- mods[pos] m <- m[is.na(tmp)] diff --git a/R/pml_control.R b/R/pml_control.R index c1fada40..cb8597a0 100644 --- a/R/pml_control.R +++ b/R/pml_control.R @@ -1,7 +1,7 @@ #' Auxiliary for Controlling Fitting #' -#' Auxiliary functions for \code{\link{optim.pml}} fitting. Use it to construct -#' a \code{control} or \code{ratchet.par} argument. +#' Auxiliary functions for providing \code{\link{optim.pml}, \link{pml_bb}} +#' fitting. Use it to construct a \code{control} or \code{ratchet.par} argument. #' #' \code{pml.control} controls the fitting process. \code{epsilon} and #' \code{maxit} are only defined for the most outer loop, this affects @@ -72,7 +72,7 @@ pml.control <- function(epsilon = 1e-08, maxit = 10, trace = 1, tau = 1e-8, #' @rdname pml.control #' @export -ratchet.control <- function(iter = 20L, maxit = 200L, minit = 50L, prop = 1/2, +ratchet.control <- function(iter = 20L, maxit = 200L, minit = 100L, prop = 1/2, rell = TRUE, bs=1000L){ if (!is.numeric(maxit) || maxit <= 0) stop("maximum number of iterations must be > 0") diff --git a/R/pml_generics.R b/R/pml_generics.R index a21d472c..8af7b2b7 100644 --- a/R/pml_generics.R +++ b/R/pml_generics.R @@ -1,3 +1,4 @@ +#' @rdname pml #' @export logLik.pml <- function(object, ...) { res <- object$logLik @@ -28,7 +29,7 @@ BIC.pml <- function(object, ...) { res } - +#' @rdname pml #' @export anova.pml <- function(object, ...) { X <- c(list(object), list(...)) @@ -47,6 +48,7 @@ anova.pml <- function(object, ...) { } +#' @rdname pml #' @export vcov.pml <- function(object, ...) { FI <- score(object, FALSE)[[2]] @@ -59,7 +61,7 @@ vcov.pml <- function(object, ...) { res } - +#' @rdname pml #' @export print.pml <- function(x, ...) { model <- guess_model(x) @@ -91,6 +93,7 @@ print.pml <- function(x, ...) { row.names(rw) <- as.character(seq_len(nrow(rw))) print(rw) } + if(!is.null(x$method) && x$method == "tipdated") cat("\nRate:", x$rate, "\n") if (type == "AA") cat("Rate matrix:", x$model, "\n") if (type == "DNA") { cat("\nRate matrix:\n") @@ -119,4 +122,30 @@ print.pml <- function(x, ...) { names(bf) <- levels print(bf) #cat(bf, "\n") } + if(!isTRUE(all.equal(x$rate, 1))) cat("\nRate:", x$rate, "\n") +} + + +#' Export pml objects +#' +#' \code{write.pml} writes out the ML tree and the model parameters. +#' +#' @param x an object of class ancestral. +#' @param file a file name. File endings are added. +#' @param ... Further arguments passed to or from other methods. +#' @returns \code{write.pml} returns the input x invisibly. +#' @seealso \code{\link{ancestral.pml}}, \code{\link{plotAnc}} +#' @examples +#' data(woodmouse) +#' fit <- pml_bb(woodmouse, "JC", rearrangement = "none") +#' write.pml(fit, "woodmouse") +#' unlink(c("woodmouse_pml.txt", "woodmouse_tree.nwk")) +#' @export +write.pml <- function(x, file=tempfile(), ...){ + write.tree(x$tree, file=paste0(file, "_tree.nwk")) + if(!is.null(x$bs)) write.tree(x$bs, file=paste0(file, "_bs.nwk")) + sink(paste0(file, "_pml.txt")) + print.pml(x) + sink() + invisible(x) } diff --git a/R/read.nexus.partitions.R b/R/read.nexus.partitions.R index 6ef8c70b..b23ada8d 100644 --- a/R/read.nexus.partitions.R +++ b/R/read.nexus.partitions.R @@ -39,12 +39,14 @@ read.nexus.charset <- function(file){ #' Function to import partitioned data from nexus files #' #' \code{read.nexus.partitions} reads in sequences in NEXUS format and splits -#' the data according to the charsets givb in the SETS block. +#' the data according to the charsets given in the SETS block. #' #' @param file a file name. -#' @param return either return a list where eeach element is a 'phyDat' object +#' @param return either returns a list where each element is a 'phyDat' object #' or an object of class 'multiphyDat' #' @param \dots Further arguments passed to or from other methods. +#' @return a list where each element is a 'phyDat' object or an object of class +#' 'multiphyDat'. #' @author Klaus Schliep \email{klaus.schliep@@gmail.com} #' @seealso \code{\link{read.nexus.data}}, \code{\link{read.phyDat}} #' @keywords cluster diff --git a/R/read.nexus.splits.R b/R/read.nexus.splits.R index f5e1a4b0..9f0e9f57 100644 --- a/R/read.nexus.splits.R +++ b/R/read.nexus.splits.R @@ -64,6 +64,7 @@ read.nexus.splits <- function(file) { spEnd <- spEnd[spEnd > sp][1] dims <- grep("DIMENSION", X, ignore.case = TRUE) cyc <- grep("CYCLE", X, ignore.case = TRUE) + fcyc <- FALSE matr <- grep("MATRIX", X, ignore.case = TRUE) format <- grep("FORMAT", X, ignore.case = TRUE) start <- matr[matr > sp][1] + 1 @@ -113,13 +114,14 @@ read.nexus.splits <- function(file) { tmp <- gsub("CYCLE", "", tmp, ignore.case = TRUE) tmp <- sub("\\s+", "", tmp) cyc <- as.integer(na.omit(as.numeric(strsplit(tmp, " ")[[1]]))) + fcyc <- TRUE } attr(res, "labels") <- x if (fwei) attr(res, "weights") <- weights if (fint) attr(res, "intervals") <- intervals if (fcon) attr(res, "confidences") <- confidences if (flab) attr(res, "splitlabels") <- labels - attr(res, "cycle") <- cyc + if (fcyc) attr(res, "cycle") <- cyc class(res) <- "splits" res } diff --git a/R/read.phyDat.R b/R/read.phyDat.R index 6f2d4881..9649652f 100644 --- a/R/read.phyDat.R +++ b/R/read.phyDat.R @@ -35,7 +35,7 @@ read.fasta.user <- function (file, skip = 0, nlines = 0, #' #' \code{write.phyDat} calls the function \code{\link[ape]{write.dna}} or #' \code{\link[ape]{write.nexus.data}} and \code{read.phyDat} calls the function -#' \code{\link[ape]{read.dna}}, \code{read.aa} or \code{read.nexus.data}, so see +#' \code{\link[ape]{read.dna}} or \code{read.nexus.data}, so see #' for more details over there. #' #' You may import data directly with \code{\link[ape]{read.dna}} or @@ -57,7 +57,7 @@ read.fasta.user <- function (file, skip = 0, nlines = 0, #' \url{https://www.ncbi.nlm.nih.gov/blast/fasta.shtml} Felsenstein, J. (1993) #' Phylip (Phylogeny Inference Package) version 3.5c. Department of Genetics, #' University of Washington. -#' \url{https://evolution.genetics.washington.edu/phylip/phylip.html} +#' \url{https://phylipweb.github.io/phylip/} #' @examples #' fdir <- system.file("extdata/trees", package = "phangorn") #' primates <- read.phyDat(file.path(fdir, "primates.dna"), @@ -114,6 +114,7 @@ read.phyDat <- function(file, format="phylip", type="DNA", ...){ #' @export write.phyDat <- function(x, file, format="phylip", colsep = "", nbcol=-1, ...){ formats <- c("phylip", "nexus", "interleaved", "sequential", "fasta") + if(inherits(x, "ancestral")) x <- as.phyDat(x) format <- match.arg(tolower(format), formats) if(format=="nexus"){ type <- attr(x, "type") diff --git a/R/sankoff.R b/R/sankoff.R index ffb9e290..35031d23 100644 --- a/R/sankoff.R +++ b/R/sankoff.R @@ -46,8 +46,7 @@ fit.sankoff <- function(tree, data, cost, #' @rdname parsimony #' @export sankoff <- function(tree, data, cost = NULL, site = "pscore") { - if (!inherits(data, "phyDat")) - stop("data must be of class phyDat") + if (!inherits(data, "phyDat")) stop("data must be of class phyDat") data <- prepareDataSankoff(data) if (is.null(cost)) { levels <- attr(data, "levels") diff --git a/R/simSeq.R b/R/simSeq.R index 66a8ba0a..111d0142 100644 --- a/R/simSeq.R +++ b/R/simSeq.R @@ -122,8 +122,6 @@ simSeq.phylo <- function(x, l = 1000, Q = NULL, bf = NULL, rootseq = NULL, if (pt == "AA") levels <- c("A", "R", "N", "D", "C", "Q", "E", "G", "H", "I", "L", "K", "M", "F", "P", "S", "T", "W", "Y", "V") -# c("a", "r", "n", "d", "c", "q", "e", "g", "h", "i", -# "l", "k", "m", "f", "p", "s", "t", "w", "y", "v") if (pt == "CODON") { .syn <- synonymous_subs(code=code) .sub <- tstv_subs(code=code) @@ -180,7 +178,7 @@ simSeq.phylo <- function(x, l = 1000, Q = NULL, bf = NULL, rootseq = NULL, if (pt == "DNA") return(phyDat.DNA(res, return.index = TRUE)) if (pt == "AA") return(phyDat.AA(res, return.index = TRUE)) if (pt == "USER") return(phyDat.default(res, levels = levels, - return.index = TRUE)) + return.index = TRUE, ...)) if (pt == "CODON") { res <- t(apply(res, 1, function(x) unlist(strsplit(x, "")))) return(phyDat.codon(res)) diff --git a/R/splits.R b/R/splits.R index 0cc2a51a..88d7abc9 100644 --- a/R/splits.R +++ b/R/splits.R @@ -185,7 +185,7 @@ unique.splits <- function(x, incomparables = FALSE, unrooted = TRUE, ...) { } -#' @export +#' @export distinct.splits distinct.splits <- function(...) { tmp <- c(...) res <- unique(tmp) @@ -433,7 +433,7 @@ as.splits.bitsplits <- function(x, ...){ #' @export compatible <- function(obj1, obj2 = NULL) { if (!inherits(obj1, "splits")) - stop("obj needs to be of class splits") + stop("obj must be of class splits") labels <- attr(obj1, "labels") l <- length(labels) n <- length(obj1) @@ -469,11 +469,10 @@ compatible <- function(obj1, obj2 = NULL) { # in clanistic.R ?? compatible3 <- function(x, y = NULL) { if (!inherits(x, "splits")) - stop("x needs to be of class splits") + stop("x must be of class splits") if (is.null(y)) y <- x - if (!inherits(y, "splits")) - stop("y needs to be of class splits") + stop("y must be of class splits") xlabels <- attr(x, "labels") ylabels <- attr(y, "labels") if (identical(xlabels, ylabels)) labels <- xlabels diff --git a/R/splitsNetwork.R b/R/splitsNetwork.R index b02df413..9d9eda08 100644 --- a/R/splitsNetwork.R +++ b/R/splitsNetwork.R @@ -51,7 +51,7 @@ splitsNetwork <- function(dm, splits = NULL, gamma = .1, lambda = 1e-6, if (!is.null(splits)) { tmp <- which(lengths(splits) == k) - splits <- splits[-tmp] + if(length(tmp)>0) splits <- splits[-tmp] lab <- attr(splits, "labels") dm <- dm[lab, lab] } diff --git a/R/superTree.R b/R/superTree.R index 03ffde53..95c8c14f 100644 --- a/R/superTree.R +++ b/R/superTree.R @@ -1,7 +1,7 @@ tree2phyDat <- function(trees) { # some minor error checking if (!inherits(trees, "multiPhylo")) - stop("trees must be object of class 'multiPhylo.'") + stop("trees must be of class multiPhylo") labels <- lapply(trees, function(x) sort(x$tip.label)) ulabels <- unique(labels) @@ -147,9 +147,9 @@ dist.superTree <- function(tree, trace = 0, fun, start = NULL, #' bs <- bootstrap.phyDat(Laurasiatherian, #' FUN = function(x) upgma(dist.hamming(x)), bs=50) #' -#' mrp_st <- superTree(bs) +#' mrp_st <- superTree(bs, minit = 25, maxit=50) #' plot(mrp_st) -#' \dontrun{ +#' \donttest{ #' rf_st <- superTree(bs, method = "RF") #' spr_st <- superTree(bs, method = "SPR") #' } diff --git a/R/transferBootstrap.R b/R/transferBootstrap.R index 2affd7f4..e01533bb 100644 --- a/R/transferBootstrap.R +++ b/R/transferBootstrap.R @@ -1,17 +1,19 @@ ## include in addConfidences, plotBS etc. #' Transfer Bootstrap #' -#' \code{transferBootstrap} assignes transfer bootstrap (Lemoine et al. 2018) +#' \code{transferBootstrap} assigns transfer bootstrap (Lemoine et al. 2018) #' values to the (internal) edges. #' #' @param tree The tree on which edges the bootstrap values are plotted. -#' @param BStrees a list of trees (object of class "multiPhylo"). -#' @return \code{plotBS} returns silently a tree, i.e. an object of class -#' \code{phylo} with the bootstrap values as node labels. The argument -#' \code{BSTrees} is optional and if not supplied the labels supplied -#' in the \code{node.label} slot will be used. +#' @param trees a list of trees (object of class "multiPhylo"). +#' @param phylo Logical, return a phylogentic tree with support value or a +#' vector of bootstrap values. +#' @param scale scale the values. +#' @return a phylogentic tree (a phylo object) with bootstrap values assigned to +#' the node labels. #' @author Klaus Schliep \email{klaus.schliep@@gmail.com} -#' @seealso \code{\link{plotBS}}, \code{\link{maxCladeCred}} +#' @seealso \code{\link{plotBS}}, \code{\link{maxCladeCred}}, +#' \code{\link{drawSupportOnEdges}} #' @references Lemoine, F., Entfellner, J. B. D., Wilkinson, E., Correia, D., #' Felipe, M. D., De Oliveira, T., & Gascuel, O. (2018). Renewing Felsenstein’s #' phylogenetic bootstrap in the era of big data. \emph{Nature}, @@ -29,19 +31,19 @@ #' # same as #' plotBS(raxml.tree, raxml.bootstrap, "p", "TBE") #' @export -transferBootstrap <- function(tree, BStrees){ - if(!inherits(BStrees, "multiPhylo")) - stop("BSTrees needs to be of class multiPhylo!") - BStrees <- .uncompressTipLabel(BStrees) - BStrees <- .compressTipLabel(BStrees, tree$tip.label) - BStrees <- reorder(BStrees, "postorder") +transferBootstrap <- function(tree, trees, phylo=TRUE, scale=TRUE){ + if(!inherits(trees, "multiPhylo")) + stop("trees must be of class multiPhylo") + trees <- .uncompressTipLabel(trees) + trees <- .compressTipLabel(trees, tree$tip.label) + trees <- reorder(trees, "postorder") l <- Ntip(tree) bp <- prop.part(tree) bp <- SHORTwise(bp)[-1] not_cherry <- lengths(bp) != 2 res <- numeric(length(bp)) - for(i in seq_along(BStrees)){ - tmp <- BStrees[[i]] + for(i in seq_along(trees)){ + tmp <- trees[[i]] bptmp <- prop.part(tmp) bptmp <- SHORTwise(bptmp)[-1] ind <- fmatch(bp, bptmp) @@ -50,8 +52,11 @@ transferBootstrap <- function(tree, BStrees){ ind <- which(is.na(ind) & not_cherry) for(j in ind) res[j] <- res[j] + Transfer_Index(bp[[j]], tmp$edge, l) } - res <- res / length(BStrees) * 100 - tree$node.label <- c(NA_real_, res) + res <- res / length(trees) + if(! scale) res <- res * 100 + res <- c(NA_real_, res) + if(!phylo) return(res) + tree$node.label <- res tree } diff --git a/R/treeManipulation.R b/R/treeManipulation.R index 8a288b5d..fd0e4837 100644 --- a/R/treeManipulation.R +++ b/R/treeManipulation.R @@ -68,13 +68,16 @@ changeEdgeLength <- function(tree, edge, edge.length) { #' #' \code{midpoint} performs midpoint rooting of a tree. \code{pruneTree} #' produces a consensus tree. -#' #' \code{pruneTree} prunes back a tree and produces a consensus tree, for trees #' already containing nodelabels. It assumes that nodelabels are numerical or #' character that allows conversion to numerical, it uses -#' as.numeric(as.character(tree$node.labels)) to convert them. \code{midpoint} -#' so far does not transform node.labels properly. -#' +#' as.numeric(as.character(tree$node.labels)) to convert them. +#' \code{midpoint} by default assumes that node labels contain support values. +#' This works if support values are computed from splits, but should be +#' recomputed for clades. +#' \code{keep_as_tip} takes a list of tips and/or node labels and returns a tree +#' pruned to those. If node label, then it prunes all descendants of that node +#' until that internal node becomes a tip. #' @param tree an object of class \code{phylo}. #' @param FUN a function evaluated on the nodelabels, result must be logical. @@ -509,8 +512,8 @@ add.tips <- function(tree, tips, where, edge.length = NULL) { #' Compute all trees topologies. #' -#' \code{allTrees} computes all tree topologies for rooted or unrooted trees -#' with up to 10 tips. \code{allTrees} returns bifurcating trees. +#' \code{allTrees} computes all bifurcating tree topologies for rooted or unrooted +#' trees with up to 10 tips. The number of trees grows fast #' #' #' @param n Number of tips (<=10). @@ -518,7 +521,8 @@ add.tips <- function(tree, tips, where, edge.length = NULL) { #' @param tip.label Tip labels. #' @return an object of class \code{multiPhylo}. #' @author Klaus Schliep \email{klaus.schliep@@gmail.com} -#' @seealso \code{\link[ape]{rtree}}, \code{\link{nni}} +#' @seealso \code{\link[ape]{rtree}}, \code{\link{nni}}, +#' \code{\link[ape]{howmanytrees}}, \code{\link{dfactorial}} #' @keywords cluster #' @examples #' @@ -533,6 +537,7 @@ add.tips <- function(tree, tips, where, edge.length = NULL) { allTrees <- function(n, rooted = FALSE, tip.label = NULL) { n <- as.integer(n) nt <- as.integer(round(dfactorial(2 * (n + rooted) - 5))) + Nnode <- as.integer(n - 2L + rooted) if ( (n + rooted) > 10) { stop(gettextf("That would generate %d trees, and take up more than %d MB of memory!", nt, as.integer(round(nt / 1000)), domain = "R-phangorn")) @@ -603,7 +608,7 @@ allTrees <- function(n, rooted = FALSE, tip.label = NULL) { edge <- edges[[x]] edge <- edge[reorderRcpp(edge, n, n + 1L, 2L), ] tree <- list(edge = edge) - tree$Nnode <- as.integer(n - 2L + rooted) + tree$Nnode <- Nnode attr(tree, "order") <- "postorder" class(tree) <- "phylo" trees[[x]] <- tree @@ -632,7 +637,16 @@ allAncestors <- function(x) { res } - +char2pos <- function(x, node){ + if(is.null(x$node.label)){ + tmp <- as.character(seq(Ntip(x)+1, Ntip(x)+Nnode(x))) + labels <- c(x$tip.label, tmp) + } + else labels <- c(x$tip.label, x$node.label) + x <- match(node, labels) + if(any(is.na(x))) stop("Can't find supplied node in the labels") + x +} ## @aliases Ancestors Children Descendants Siblings mrca.phylo #' tree utility function @@ -649,7 +663,8 @@ allAncestors <- function(x) { #' If the argument node is missing the function is evaluated for all nodes. #' #' @param x a tree (a phylo object). -#' @param node an integer or a vector of integers corresponding to a node ID +#' @param node an integer or character vector (or scalar) corresponding to a +#' node ID #' @param type specify whether to return just direct children / parents or all #' @param include.self whether to include self in list of siblings #' @param full a logical indicating whether to return the MRCAs among all tips @@ -678,6 +693,7 @@ allAncestors <- function(x) { #' @export #' @rdname Ancestors Ancestors <- function(x, node, type = c("all", "parent")) { + if(!missing(node) && inherits(node, "character")) node <- char2pos(x, node) parents <- x$edge[, 1] child <- x$edge[, 2] pvector <- integer(max(x$edge)) # parents @@ -730,6 +746,7 @@ allDescendants <- function(x) { #' @export Children <- function(x, node) { # return allChildren if node is missing + if(!missing(node) && inherits(node, "character")) node <- char2pos(x, node) if (!missing(node) && length(node) == 1) return(x$edge[x$edge[, 1] == node, 2]) allChildren(x)[node] @@ -740,6 +757,7 @@ Children <- function(x, node) { #' @export Descendants <- function(x, node, type = c("tips", "children", "all")) { type <- match.arg(type) + if(!missing(node) && inherits(node, "character")) node <- char2pos(x, node) if (type == "children") return(Children(x, node)) if (type == "tips") return(bip(x)[node]) # new version using Rcpp @@ -766,6 +784,7 @@ Descendants <- function(x, node, type = c("tips", "children", "all")) { #' @export Siblings <- function(x, node, include.self = FALSE) { if (missing(node)) node <- as.integer(1:max(x$edge)) + if(!missing(node) && inherits(node, "character")) node <- char2pos(x, node) l <- length(node) if (l == 1) { v <- Children(x, Ancestors(x, node, "parent")) @@ -799,6 +818,7 @@ Siblings <- function(x, node, include.self = FALSE) { #' @export mrca.phylo <- function(x, node = NULL, full = FALSE) { if (is.null(node)) return(mrca2(x, full = full)) + if(!missing(node) && inherits(node, "character")) node <- char2pos(x, node) return(getMRCA(x, node)) } @@ -843,3 +863,14 @@ relabel <- function(y, ref) { y$tip.label <- ref y } + +#' @rdname midpoint +#' @param labels tip and node labels to keep as tip labels in the tree +#' @export +keep_as_tip<- function(tree, labels){ + nodes_to_keep <- labels[!is.na(match(labels, tree$node.label))] + tips_to_remove <- Descendants(tree, nodes_to_keep) |> unlist() |> unique() + tree_1 <- drop.tip(tree, tips_to_remove, subtree = TRUE) + tree_2 <- keep.tip(tree_1, labels, collapse.singles=FALSE) + tree_2 +} diff --git a/R/treeRearrangement.R b/R/treeRearrangement.R index 5f089338..b783dd2d 100644 --- a/R/treeRearrangement.R +++ b/R/treeRearrangement.R @@ -212,7 +212,7 @@ kSPR <- function(tree, k = NULL) { oneOf4 <- function(tree, ind1, ind2, from = 1, to = 1, root) { if (!is.binary(tree)) - stop("Sorry, trees must be binary!") + stop("trees must be binary") tree <- reroot(tree, ind2, FALSE) kids1 <- Children(tree, ind1) anc <- Ancestors(tree, ind1, "all") diff --git a/R/treedist.R b/R/treedist.R index 341bd335..0adc5e58 100644 --- a/R/treedist.R +++ b/R/treedist.R @@ -49,6 +49,9 @@ cophenetic.splits <- function(x) { #' @seealso \code{\link[stats]{cophenetic}} for the generic function, #' \code{neighborNet} to construct a network from a distance matrix #' @keywords manip +#' @examples +#' example(neighborNet) +#' cophenetic(nnet) #' @export cophenetic.networx <- function(x) { spl <- x$splits @@ -56,6 +59,22 @@ cophenetic.networx <- function(x) { } +# helper functions +fun1 <- function(x) { + w <- numeric(max(x$edge)) + w[x$edge[, 2]] <- x$edge.length + w +} + + +fun2 <- function(x, rooted=FALSE) { + bp <- bip(x) + if(!rooted) bp <- SHORTwise(bp) + bp <- sapply(bp, paste, collapse = "_") + bp +} + + ## @aliases treedist RF.dist wRF.dist KF.dist path.dist sprdist SPR.dist #' Distances between trees #' @@ -156,7 +175,8 @@ treedist <- function(tree1, tree2, check.labels = TRUE) { branch.score.difference <- NULL path.difference <- NULL quadratic.path.difference <- NULL - if (!is.binary(tree1) | !is.binary(tree2)) message("Trees are not binary!") + if (!is.binary(tree1) | !is.binary(tree2)) + message("Some trees are not binary. Result may not what you expect!") bp1 <- bip(tree1) bp2 <- bip(tree2) @@ -232,7 +252,8 @@ sprdist <- function(tree1, tree2) { # side of splits) tree1 <- reorder(tree1, "postorder") tree2 <- reorder(tree2, "postorder") - if (!is.binary(tree1) | !is.binary(tree2)) message("Trees are not binary!") + if (!is.binary(tree1) | !is.binary(tree2)) + message("Some trees are not binary. Result may not what you expect!") # possibly replace bip with bipart bp1 <- bip(tree1) bp1 <- SHORTwise(bp1) @@ -343,6 +364,7 @@ wRF0 <- function(tree1, tree2, normalize = FALSE, check.labels = TRUE, r2 <- is.rooted(tree2) if (r1 != r2) { message("one tree is unrooted, unrooted both") + rooted <- FALSE } if (!rooted) { if (r1) @@ -350,25 +372,14 @@ wRF0 <- function(tree1, tree2, normalize = FALSE, check.labels = TRUE, if (r2) tree2 <- unroot(tree2) } - if (!r1 | !r2) { - if (r1) - tree1 <- unroot(tree1) - if (r2) - tree2 <- unroot(tree2) - } if (!is.binary(tree1) | !is.binary(tree2)) - message("Trees are not binary!") + message("Some trees are not binary. Result may not what you expect!") if (check.labels) tree2 <- checkLabels(tree2, tree1$tip.label) if (has.singles(tree1)) tree1 <- collapse.singles(tree1) if (has.singles(tree2)) tree2 <- collapse.singles(tree2) - bp1 <- bip(tree1) - bp2 <- bip(tree2) - if (!rooted) { - bp1 <- SHORTwise(bp1) - bp2 <- SHORTwise(bp2) - } - bp1 <- sapply(bp1, paste, collapse = "_") - bp2 <- sapply(bp2, paste, collapse = "_") + + bp1 <- fun2(tree1, rooted) + bp2 <- fun2(tree2, rooted) w1 <- numeric(max(tree1$edge)) w2 <- numeric(max(tree2$edge)) @@ -398,7 +409,7 @@ wRF2 <- function(tree, trees, normalize = FALSE, check.labels = TRUE, trees <- .uncompressTipLabel(trees) if (rooted && any(!is.rooted(trees))) { - warning("some trees were rooted, unrooted all") + message("some trees were rooted, unrooted all") rooted <- FALSE } @@ -414,34 +425,13 @@ wRF2 <- function(tree, trees, normalize = FALSE, check.labels = TRUE, unclass(trees) nTips <- length(tree$tip.label) - - fun1 <- function(x) { - w <- numeric(max(x$edge)) - w[x$edge[, 2]] <- x$edge.length - w - } W <- lapply(trees, fun1) - fun2 <- function(x, nTips) { - bp <- bip(x) - bp <- SHORTwise(bp) - bp <- sapply(bp, paste, collapse = "_") - bp - } - fun3 <- function(x, nTips) { - bp <- bip(x) - bp <- sapply(bp, paste, collapse = "_") - bp - } - if (rooted) BP <- lapply(trees, fun3, nTips) - else BP <- lapply(trees, fun2, nTips) + BP <- lapply(trees, fun2, rooted) if (!rooted && is.rooted(tree)) tree <- unroot(tree) - bp <- bip(tree) - - if (!rooted) bp <- SHORTwise(bp) - bp <- sapply(bp, paste, collapse = "_") + bp <- fun2(tree, rooted) w <- numeric(max(tree$edge)) w[tree$edge[, 2]] <- tree$edge.length @@ -473,7 +463,7 @@ wRF1 <- function(trees, normalize = FALSE, check.labels = TRUE, trees <- .uncompressTipLabel(trees) if (rooted && any(!is.rooted(trees))) { - warning("some trees were rooted, unrooted all") + message("some trees were rooted, unrooted all") rooted <- FALSE } if (!rooted) { @@ -485,26 +475,12 @@ wRF1 <- function(trees, normalize = FALSE, check.labels = TRUE, unclass(trees) nTips <- length(trees[[1]]$tip.label) - fun1 <- function(x) { - w <- numeric(max(x$edge)) - w[x$edge[, 2]] <- x$edge.length - w - } + W <- lapply(trees, fun1) - fun2 <- function(x, nTips) { - bp <- bip(x) - bp <- SHORTwise(bp) - bp <- sapply(bp, paste, collapse = "_") - bp - } - fun3 <- function(x, nTips) { - bp <- bip(x) - bp <- sapply(bp, paste, collapse = "_") - bp - } + if (normalize) sc <- sapply(trees, function(x) sum(x$edge.length)) - if (rooted) BP <- lapply(trees, fun3, nTips) - else BP <- lapply(trees, fun2, nTips) + + BP <- lapply(trees, fun2, rooted) k <- 1 l <- length(trees) wRF <- numeric( (l * (l - 1)) / 2) @@ -534,21 +510,9 @@ wRF1 <- function(trees, normalize = FALSE, check.labels = TRUE, mRF2 <- function(tree, trees, normalize = FALSE, check.labels = TRUE, rooted = FALSE) { - if (!inherits(trees, "multiPhylo")) - stop("Argument trees should be an object of class \"multiPhylo\"") - if (!inherits(tree, "phylo")) - stop("Argument tree should be an object of class \"phylo\"") trees <- .compressTipLabel(trees) tipLabel <- attr(trees, "TipLabel") if (check.labels) tree <- checkLabels(tree, tipLabel) -# if (check.labels) { -# ind <- match(tipLabel, tree$tip.label) -# if (any(is.na(ind)) | length(tipLabel) != length(tree$tip.label)) -# stop("trees have different labels") -# tree$tip.label <- tree$tip.label[ind] -# ind2 <- match(seq_along(ind), tree$edge[, 2]) -# tree$edge[ind2, 2] <- order(ind) -# } nTips <- length(tipLabel) l <- length(trees) RF <- numeric(l) @@ -557,7 +521,7 @@ mRF2 <- function(tree, trees, normalize = FALSE, check.labels = TRUE, if (has.singles(tree)) tree <- collapse.singles(tree) if (!rooted && any(is.rooted(trees))) { - warning("some trees were rooted, unrooted all") + message("some trees were rooted, unrooted all") trees <- unroot(trees) } if (!rooted && is.rooted(tree)) tree <- unroot(tree) @@ -592,8 +556,6 @@ mRF2 <- function(tree, trees, normalize = FALSE, check.labels = TRUE, mRF <- function(trees, normalize = FALSE, rooted = FALSE) { - if (!inherits(trees, "multiPhylo")) - stop("Argument trees should be an object of class \"multiPhylo\"") trees <- .compressTipLabel(trees) tipLabel <- attr(trees, "TipLabel") nTips <- length(tipLabel) @@ -601,7 +563,7 @@ mRF <- function(trees, normalize = FALSE, rooted = FALSE) { RF <- numeric( (l * (l - 1)) / 2) if (rooted && any(!is.rooted(trees))) { - warning("Some trees were rooted, unrooted all") + message("some trees were rooted, unrooted all") rooted <- FALSE } if (!rooted) { @@ -666,7 +628,8 @@ RF0 <- function(tree1, tree2 = NULL, normalize = FALSE, check.labels = TRUE, } } if (check.labels) tree2 <- checkLabels(tree2, tree1$tip.label) - if (!is.binary(tree1) | !is.binary(tree2)) message("Trees are not binary!") + if (!is.binary(tree1) | !is.binary(tree2)) + message("Some trees are not binary. Result may not what you expect!") bp1 <- bipart(tree1) bp2 <- bipart(tree2) nTips <- length(tree1$tip.label) @@ -732,15 +695,8 @@ kf0 <- function(tree1, tree2, check.labels = TRUE, rooted = FALSE) { } } - bp1 <- bip(tree1) - bp2 <- bip(tree2) - - if (!rooted) { - bp1 <- SHORTwise(bp1) - bp2 <- SHORTwise(bp2) - } - bp1 <- sapply(bp1, paste, collapse = "_") - bp2 <- sapply(bp2, paste, collapse = "_") + bp1 <- fun2(tree1, rooted) + bp2 <- fun2(tree2, rooted) w1 <- numeric(max(tree1$edge)) w2 <- numeric(max(tree2$edge)) @@ -769,7 +725,7 @@ kf1 <- function(tree, trees, check.labels = TRUE, rooted = FALSE) { if (has.singles(tree)) tree <- collapse.singles(tree) if (rooted && any(!is.rooted(trees))) { - warning("some trees were rooted, unrooted all") + message("some trees were rooted, unrooted all") rooted <- FALSE } if (!rooted) { @@ -782,31 +738,13 @@ kf1 <- function(tree, trees, check.labels = TRUE, rooted = FALSE) { nTips <- length(tree$tip.label) - fun1 <- function(x) { - w <- numeric(max(x$edge)) - w[x$edge[, 2]] <- x$edge.length - w - } W <- lapply(trees, fun1) - fun2 <- function(x, nTips) { - bp <- bip(x) - bp <- SHORTwise(bp) - bp <- sapply(bp, paste, collapse = "_") - bp - } - fun3 <- function(x, nTips) { - bp <- bip(x) - bp <- sapply(bp, paste, collapse = "_") - bp - } - if (rooted) BP <- lapply(trees, fun3, nTips) - else BP <- lapply(trees, fun2, nTips) + BP <- lapply(trees, fun2, rooted) if (!rooted && is.rooted(tree)) tree <- unroot(tree) - bp <- bip(tree) - if (!rooted) bp <- SHORTwise(bp) - bp <- sapply(bp, paste, collapse = "_") + + bp <- fun2(tree, rooted) w <- numeric(max(tree$edge)) w[tree$edge[, 2]] <- tree$edge.length @@ -835,7 +773,7 @@ kf2 <- function(trees, check.labels = TRUE, rooted = FALSE) { nTips <- length(trees[[1]]$tip.label) if (rooted && any(!is.rooted(trees))) { - warning("some trees were rooted, unrooted all") + message("some trees were rooted, unrooted all") rooted <- FALSE } if (!rooted && any(is.rooted(trees))) { @@ -843,27 +781,9 @@ kf2 <- function(trees, check.labels = TRUE, rooted = FALSE) { } unclass(trees) - fun1 <- function(x) { - w <- numeric(max(x$edge)) - w[x$edge[, 2]] <- x$edge.length - w - } W <- lapply(trees, fun1) - - fun2 <- function(x, nTips) { - bp <- bip(x) - bp <- SHORTwise(bp) - bp <- sapply(bp, paste, collapse = "_") - bp - } - fun3 <- function(x, nTips) { - bp <- bip(x) - bp <- sapply(bp, paste, collapse = "_") - bp - } - if (rooted) BP <- lapply(trees, fun3, nTips) - else BP <- lapply(trees, fun2, nTips) + BP <- lapply(trees, fun2, rooted) k <- 1 l <- length(trees) diff --git a/R/upgma.R b/R/upgma.R new file mode 100644 index 00000000..7ac5275d --- /dev/null +++ b/R/upgma.R @@ -0,0 +1,105 @@ +#' UPGMA, WPGMA and sUPGMA +#' +#' UPGMA and WPGMA clustering. UPGMA and WPGMA are a wrapper function around +#' \code{\link[stats]{hclust}} returning a \code{phylo} object. +## UPGMA additionally performs nearest neighbor interchange (NNI) tree rearrangements +## to improve the phylogeny (Schliep et al. 2024). +#' \code{supgma} perform serial sampled UPGMA similar to Drummond and Rodrigo +#' (2000). +## and also performing NNI rearrangements. +#' +#' @param D A distance matrix. +#' @param method The agglomeration method to be used. This should be (an +#' unambiguous abbreviation of) one of "ward", "single", "complete", "average", +#' "mcquitty", "median" or "centroid". The default is "average". +## @param NNI logical whether make nearest neighbor rearrangements to improve the +## tree. Currently only available for \code{method="average"}. +#' @param trace Show output during optimization (see details). +#' @param tip.dates A named vector of sampling times associated to the tips. +#' @param \dots Further arguments passed to or from other methods. +#' @return A phylogenetic tree of class \code{phylo}. +#' @author Klaus Schliep \email{klaus.schliep@@gmail.com} +#' @seealso \code{\link{hclust}}, \code{\link{dist.hamming}}, \code{\link{NJ}}, +#' \code{\link{as.phylo}}, \code{\link{fastme}}, \code{\link{nnls.tree}}, +#' \code{\link{rtt}} +#' @references Sneath, P. H., & Sokal, R. R. (1973). \emph{Numerical taxonomy. +#' The principles and practice of numerical classification.} +#' +#' Sokal, R. R., & Michener, C. D. (1958). A statistical method for evaluating +#' systematic relationships. \emph{University of Kansas Scientific Bulletin}, +#' v. 38. +#' +#' Drummond, A., & Rodrigo, A. G. (2000). Reconstructing genealogies of serial +#' samples under the assumption of a molecular clock using serial-sample UPGMA. +#' \emph{Molecular Biology and Evolution}, \bold{17(12)}, 1807-1815. +#' @keywords cluster +#' @examples +#' +#' data(Laurasiatherian) +#' dm <- dist.ml(Laurasiatherian) +#' tree <- upgma(dm) +#' plot(tree) +#' +#' @rdname upgma +#' @export +upgma <- function(D, method = "average", ...) { + method <- match.arg(method, c("average", "ward.D", "ward.D2", "single", + "complete", "average", "mcquitty", "median", "centroid")) + DD <- as.dist(D) + hc <- hclust(DD, method = method) + result <- as.phylo(hc) +# if(NNI && method=="average"){ +# result <- upgma_nni(DD, tree=result, ...) +# } + result <- reorder(result, "postorder") + result +} + + +#' @rdname upgma +#' @export +wpgma <- function(D, method = "mcquitty", ...) { + method <- match.arg(method, c("average", "ward.D", "ward.D2", "single", + "complete", "average", "mcquitty", "median", "centroid")) + DD <- as.dist(D) + hc <- hclust(DD, method = method, ...) + result <- as.phylo(hc) + result <- reorder(result, "postorder") + result +} + + +#' @rdname upgma +#' @export +supgma <- function(D, tip.dates, trace=0){ + tree <- fastme.ols(D) + tree <- checkLabels(tree, attr(D, "Labels")) + tree <- rtt(tree, tip.dates[tree$tip.label]) + tree <- nnls.tree(D, tree, method = "tipdated", + tip.dates=tip.dates[tree$tip.label]) + rate_0 <- attr(tree, "rate") + dm_td <- designTipDated(tree, tip.dates[tree$tip.label]) + dm_td <- dm_td[, ncol(dm_td)] + me_start <- sum(tree$edge.length* rate_0) + if(trace) cat("ME: ", me_start, "rate:", rate_0, "\n") + me_best <- me_start + me <- 0 + iter <- TRUE + swapi <- 1 + while(iter){ + D_tmp <- D - rate_0 * dm_td + tree_tmp <- upgma(D_tmp) + swapi <- attr(tree_tmp, "swap") + tree_tmp <- nnls.tree(D, tree_tmp, method = "tipdated", + tip.dates=tip.dates[tree$tip.label]) + rate_0 <- attr(tree_tmp, "rate") + me <- sum(tree_tmp$edge.length * rate_0) + if(trace) cat("ME: ", me, "rate:", rate_0, "\n") + if(me < me_best){ + tree <- tree_tmp + me_best <- me + } else iter <- FALSE + } + tree$tip.dates <- tip.dates[tree$tip.label] + tree +} diff --git a/R/zzz.R b/R/zzz.R index a37d8e3b..fc911cf3 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -43,7 +43,6 @@ .nucleotideAlphabet <- c("a", "c", "g", "t") - # if rate g[i] is smaller than .gEps invariant site is increased by w[i] .gEps <- 1e-12 @@ -55,3 +54,4 @@ loadModule("Fitch_mod", TRUE) # .onLoad <- function(libname, pkgname) { # library.dynam("phangorn", pkgname, libname) #} + diff --git a/README.md b/README.md index 1082b754..5e2c8fa4 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ [![CRAN Status Badge](https://www.r-pkg.org/badges/version/phangorn)](https://cran.r-project.org/package=phangorn) [![CRAN Downloads (monthly)](https://cranlogs.r-pkg.org/badges/phangorn)](https://cran.r-project.org/package=phangorn) [![CRAN Downloads (total)](https://cranlogs.r-pkg.org/badges/grand-total/phangorn)](https://cran.r-project.org/package=phangorn) -[![codecov.io](https://codecov.io/github/KlausVigo/phangorn/coverage.svg?branch=master)](https://codecov.io/github/KlausVigo/phangorn?branch=master) +[![Codecov test coverage](https://codecov.io/gh/KlausVigo/phangorn/branch/master/graph/badge.svg)](https://app.codecov.io/gh/KlausVigo/phangorn?branch=master) # phangorn @@ -12,15 +12,18 @@ phangorn is a package for phylogenetic reconstruction and analysis in the R lang You can install - the latest released version `install.packages("phangorn")` - the latest development version `remotes::install_github("KlausVigo/phangorn")` +- [r-universe](https://r-universe.dev/) kindly provides binaries for Windows, +Linux and OS X of the development version [here](https://klausvigo.r-universe.dev/phangorn#). -To install the development version you may need to install the Biostrings and seqLogo package from bioconductor first: +To install the development version you may need to install the Biostrings package from bioconductor first: ``` if (!requireNamespace("BiocManager", quietly = TRUE)) install.packages("BiocManager") -BiocManager::install(c("Biostrings", "seqLogo")) +BiocManager::install("Biostrings") ``` Also the development version usually depends on the latest ape development -version and information to download can be found [here](http://ape-package.ird.fr/ape_installation.html). +version and information to download can be found +[here](https://emmanuelparadis.github.io/ape_installation.html). Additionally you may need to install on windows [Rtools](https://cran.r-project.org/bin/windows/Rtools/) and on mac [XCode](https://developer.apple.com/xcode/) and [GFortran](https://gcc.gnu.org/wiki/GFortranBinaries). diff --git a/codecov.yml b/codecov.yml new file mode 100644 index 00000000..04c55859 --- /dev/null +++ b/codecov.yml @@ -0,0 +1,14 @@ +comment: false + +coverage: + status: + project: + default: + target: auto + threshold: 1% + informational: true + patch: + default: + target: auto + threshold: 1% + informational: true diff --git a/inst/tinytest/test_ancestral.R b/inst/tinytest/test_ancestral.R index 3d54aeff..bd01eb41 100644 --- a/inst/tinytest/test_ancestral.R +++ b/inst/tinytest/test_ancestral.R @@ -15,15 +15,32 @@ fit <- pml(tree, dna) # dna tests differs from other data types as it may returns ambiguous data # test ancestral generics # test.ml1 <- ancestral.pml(fit, type = "ml") -test_ml <- ancestral.pml(fit, type = "ml", return = "phyDat") -test_mpr <- ancestral.pars(tree, dna, "MPR", return = "phyDat") -test_acctran <- ancestral.pars(tree, dna, "ACCTRAN", return = "phyDat") +test_ml <- ancestral.pml(fit, type = "ml") +test_mpr <- ancestral.pars(tree, dna, "MPR") +test_acctran <- ancestral.pars(tree, dna, "ACCTRAN") -expect_equal(as.character(test_ml), as.character(test_acctran)) -expect_equal(as.character(test_ml), as.character(test_mpr)) +#expect_equal(as.character(test_ml), as.character(test_acctran)) +#expect_equal(as.character(test_ml), as.character(test_mpr)) -test_mpr_2 <- ancestral.pars(tree2, dna, "MPR", return = "phyDat") -test_acctran_2 <- ancestral.pars(tree2, dna, "ACCTRAN", return = "phyDat") +#test_mpr_2 <- ancestral.pars(tree2, dna, "MPR") +#test_acctran_2 <- ancestral.pars(tree2, dna, "ACCTRAN") -expect_equal(test_mpr_2[,1], test_acctran_2[,1], check.attributes = FALSE) +#expect_equal(test_mpr_2[,1], test_acctran_2[,1], check.attributes = FALSE) + +data(Laurasiatherian) +fit <- pml_bb(Laurasiatherian[,1:100], "JC", rearrangement = "none") +anc_ml <- ancestral.pml(fit) +write.ancestral(anc_ml) +align <- read.phyDat("ancestral_align.fasta") +tree <- read.tree("ancestral_tree.nwk") +df <- read.table("ancestral.state", header=TRUE) +anc_ml_disc <- ancestral(tree, align, df) +expect_equal(anc_ml[[1]], anc_ml_disc[[1]]) +expect_equal(anc_ml[[2]], anc_ml_disc[[2]]) +expect_equal(anc_ml[[3]], anc_ml_disc[[3]]) +expect_equal(anc_ml[[4]], anc_ml_disc[[4]]) +unlink(c("ancestral_align.fasta", "ancestral_tree.nwk", "ancestral.state")) + + +## Felsenstein example diff --git a/inst/tinytest/test_bootstrap.R b/inst/tinytest/test_bootstrap.R index 3959fb6d..1f639222 100644 --- a/inst/tinytest/test_bootstrap.R +++ b/inst/tinytest/test_bootstrap.R @@ -22,8 +22,9 @@ expect_true(is.null(attr(nnet$splits, "confidences"))) expect_false(is.null(attr(nnet2$splits, "confidences"))) dat <- Laurasiatherian[sample(47, 10)] |> as.character() |> phyDat() -fit <- pml_bb(dat, "JC", rearrangement="NNI") -bs_trees_pml <- bootstrap.pml(fit, bs=10, rearrangement="NNI") +fit <- pml_bb(dat, "JC", rearrangement="NNI", control=pml.control(trace=0)) +bs_trees_pml <- bootstrap.pml(fit, bs=10, rearrangement="NNI", + control=pml.control(trace=0)) expect_inherits(bs_trees_pml, "multiPhylo") diff --git a/inst/tinytest/test_dist_tree.R b/inst/tinytest/test_dist_tree.R index a18e67fd..e99cf1f9 100644 --- a/inst/tinytest/test_dist_tree.R +++ b/inst/tinytest/test_dist_tree.R @@ -2,17 +2,25 @@ data(Laurasiatherian) dm <- dist.ml(Laurasiatherian) # check nnls functions - tree_nj <- NJ(dm) - tree_unj <- UNJ(dm) - tree_nnls_unj <- nnls.phylo(tree_unj, dm) - tree_nnls_nj <- nnls.phylo(tree_nj, dm) +tree_nj <- NJ(dm) +tree_unj <- UNJ(dm) +tree_nnls_unj <- nnls.phylo(tree_unj, dm) +tree_nnls_nj <- nnls.phylo(tree_nj, dm) - tree_upgma <- upgma(dm) - tree_wpgma <- wpgma(dm) - tree_nnls_upgma <- nnls.phylo(tree_upgma, dm, method="ultrametric") - tree_nnls_wpgma <- nnls.phylo(tree_wpgma, dm, method="ultrametric") +tree_upgma <- upgma(dm) +tree_wpgma <- wpgma(dm) +tree_nnls_upgma <- nnls.phylo(tree_upgma, dm, method="ultrametric") +tree_nnls_wpgma <- nnls.phylo(tree_wpgma, dm, method="ultrametric") + +expect_equal(tree_upgma, tree_nnls_upgma) +expect_false(all.equal(tree_wpgma, tree_nnls_wpgma)) +expect_equal(tree_unj, tree_nnls_unj) +expect_false(all.equal(tree_nj, tree_nnls_nj)) + +# Test NNI +# tree_upgma_no_nni <- upgma(dm, NNI=FALSE) +# tree_upgma_nni <- upgma(dm, NNI=TRUE) + +# expect_true(is.ultrametric(tree_upgma_nni)) +# expect_true(sum(tree_upgma_no_nni$edge.length)>=sum(tree_upgma_nni$edge.length)) - expect_equal(tree_upgma, tree_nnls_upgma) - expect_false(all.equal(tree_wpgma, tree_nnls_wpgma)) -# expect_equal(tree_unj, tree_nnls_unj) - expect_false(all.equal(tree_nj, tree_nnls_nj)) diff --git a/inst/tinytest/test_modelTest.R b/inst/tinytest/test_modelTest.R index d14a6380..ca6b8ef6 100644 --- a/inst/tinytest/test_modelTest.R +++ b/inst/tinytest/test_modelTest.R @@ -38,9 +38,12 @@ expect_equal(MT_AA_all$Model[which.min(MT_AA_all$BIC)], "WAG") # test user defined states tree <- rcoal(10) +tree$edge.length <- 2 * (tree$edge.length / sum(tree$edge.length)) +tree$edge.length[tree$edge[,2]<11] <- tree$edge.length[tree$edge[,2]<11] + .1 + Z <- simSeq(tree, Q = c(1,0,0,1,0,1), type = "USER", levels=c("A", "B", "C", "D")) -MT_USER <- modelTest(Z, I=TRUE, G=TRUE, +MT_USER <- modelTest(Z, tree=tree, I=TRUE, G=TRUE, control = pml.control(epsilon = 1e-08, maxit = 5, trace = 0), multicore = TRUE, mc.cores = 2L) expect_equal(MT_USER$Model[which.min(MT_USER$BIC)], "ORDERED") diff --git a/inst/tinytest/test_parsimony.R b/inst/tinytest/test_parsimony.R index 42847207..68f76378 100644 --- a/inst/tinytest/test_parsimony.R +++ b/inst/tinytest/test_parsimony.R @@ -72,7 +72,6 @@ tree2 <- acctran(tree2, dat) expect_equal(sum(tree2$edge.length), fitch(tree2,dat)) - # test random.addition ra_tree <- random.addition(yeast) ratchet_tree <- pratchet(yeast, start=ra_tree, trace=0) @@ -80,3 +79,34 @@ expect_true(attr(ra_tree, "pscore") >= attr(ratchet_tree, "pscore")) trivial_tree <- pratchet(dat, trace=0, all=FALSE, minit = 10, maxit = 20) expect_true(inherits(trivial_tree, "phylo")) + +# test CI / RI +data(carnivora) +frm <- ~SuperFamily/Family/Genus/Species +tr <- as.phylo(frm, data = carnivora, collapse=FALSE) +tr$edge.length <- rep(1, nrow(tr$edge)) + +X <- matrix(0, 112, 7, dimnames = list(tr$tip.label, c("Canidae", "Felidae", + "Ursidae", "Canidae_Felidae", "Canidae_Ursidae", "Felidae_Ursidae", + "Canidae_Felidae_Ursidae"))) +desc_canidae <- Descendants(tr, "Canidae")[[1]] +desc_felidae <- Descendants(tr, "Felidae")[[1]] +desc_ursidae <- Descendants(tr, "Ursidae")[[1]] + +X[desc_canidae, c(1,4,5,7)] <- 1 +X[desc_felidae, c(2,4,6,7)] <- 1 +X[desc_ursidae, c(3,5,6,7)] <- 1 + +X <- phyDat(X, "USER", levels=c(0,1)) + +#col <- rep("black", 112) +#col[desc_felidae] <- "red" +#col[desc_canidae] <- "blue" +#col[desc_ursidae] <- "green" +#plot(tr, "f", tip.color=col, show.node=TRUE) + +ci1 <- CI(tr, X, sitewise = TRUE) +ri1 <- RI(tr, X, sitewise = TRUE) +expect_true(all(ci1[1:3]==1)) +expect_true(all(ri1[1:3]==1)) + diff --git a/inst/tinytest/test_phyDat.R b/inst/tinytest/test_phyDat.R index b178c1fc..8102777d 100644 --- a/inst/tinytest/test_phyDat.R +++ b/inst/tinytest/test_phyDat.R @@ -4,7 +4,7 @@ data(chloroplast) set.seed(42) tree <- rtree(10) codon_align <- simSeq(tree, l=100, type = "CODON") - +user_align <- simSeq(tree, l=100, type = "USER", levels=c(0:9,"-")) phy_matrix <- as.character(Laurasiatherian) phy_df <- as.data.frame(Laurasiatherian) @@ -87,14 +87,39 @@ unlink("tmp1.nex") write.phyDat(chloroplast, "tmp2.txt") expect_true(inherits(chloro <- read.phyDat("tmp2.txt", type="AA"), "phyDat")) -expect_equal(chloro, chloroplast) # changed to toupper +expect_equal(chloro, chloroplast) unlink("tmp2.txt") + write.phyDat(chloroplast, "tmp.fas", format="fasta") expect_true(inherits(chloro_fas <- read.phyDat("tmp.fas", type="AA", format = "fasta"), "phyDat")) -expect_equal(chloro_fas, chloroplast) # changed to toupper +expect_equal(chloro_fas, chloroplast) unlink("tmp.fas") +write.phyDat(chloroplast, "tmp2.nex", format="nexus") +expect_true(inherits(chloro_nex <- read.phyDat("tmp2.nex", type="AA", + format = "nexus"), "phyDat")) +expect_equal(chloro_nex, chloroplast) +unlink("tmp2.nex") + +write.phyDat(user_align, "tmp3.txt") +expect_true(inherits(user2 <- read.phyDat("tmp3.txt", type="USER", + levels=c(0:9,"-"), ambiguity="?"), "phyDat")) +expect_equal(user2, user_align) +unlink("tmp3.txt") + +write.phyDat(user_align, "tmp3.fas", format="fasta") +expect_true(inherits(user_fas <- read.phyDat("tmp3.fas", type="USER", + levels=c(0:9,"-"), ambiguity="?", format = "fasta"), "phyDat")) +expect_equal(user_fas, user_align) +unlink("tmp3.fas") + +write.phyDat(user_align, "tmp3.nex", format="nexus") +expect_true(inherits(user_nex <- read.phyDat("tmp3.nex", type="STANDARD", + format = "nexus"), "phyDat")) +expect_equal(user_nex, user_align, check.attributes = FALSE) +unlink("tmp3.nex") + # test removing duplicated sequences tmp <- as.character(Laurasiatherian) @@ -106,3 +131,18 @@ map2 <- map_duplicates(Laurasiatherian) expect_null(map2) expect_true(inherits(map1, "data.frame")) + +# check gap as state + +Laurasiatherian_gap <- gap_as_state(Laurasiatherian) +chloroplast_gap <- gap_as_state(chloroplast) + +expect_false(has_gap_state(Laurasiatherian)) +expect_true(has_gap_state(Laurasiatherian_gap)) + +expect_false(has_gap_state(chloroplast)) +expect_true(has_gap_state(chloroplast_gap)) + +expect_equal(Laurasiatherian, gap_as_ambiguous(Laurasiatherian_gap)) +expect_equal(chloroplast, gap_as_ambiguous(chloroplast)) + diff --git a/inst/tinytest/test_pmlMix.R b/inst/tinytest/test_pmlMix.R index 69c7bd14..74422ed7 100644 --- a/inst/tinytest/test_pmlMix.R +++ b/inst/tinytest/test_pmlMix.R @@ -121,4 +121,4 @@ fits <- list() for(i in 1:2) fits[[i]] <- pml(tree, X, wMix=.5) fitMixture <- pmlMix( ~ inv, fits, m=2, control=pml.control(maxit = 25, trace=0)) -expect_equal(logLik(fitMixture)[1], ll) +expect_equal(logLik(fitMixture)[1], ll, 1e-3) diff --git a/inst/tinytest/test_pmlPart.R b/inst/tinytest/test_pmlPart.R index b20de0e2..ff7e2daf 100644 --- a/inst/tinytest/test_pmlPart.R +++ b/inst/tinytest/test_pmlPart.R @@ -143,7 +143,7 @@ expect_equal(Q, sp$fits[[1]]$Q, tolerance=5e-4) # weights0 <- 1000*exp(fit0$siteLik) sp <- pmlPart( ~ inv, fit0, weight=W, control = pml.control(trace=0)) expect_equal(logLik(sp)[1], logLik(fit1)[1]*3, tolerance=5e-4 ) - expect_equal(inv, sp$fits[[1]]$inv, tolerance=5e-5) + expect_equal(inv, sp$fits[[1]]$inv, tolerance=5e-4) diff --git a/man/Ancestors.Rd b/man/Ancestors.Rd index 1e61d48e..8f99ea0c 100644 --- a/man/Ancestors.Rd +++ b/man/Ancestors.Rd @@ -24,7 +24,8 @@ mrca.phylo(x, node = NULL, full = FALSE) \arguments{ \item{x}{a tree (a phylo object).} -\item{node}{an integer or a vector of integers corresponding to a node ID} +\item{node}{an integer or character vector (or scalar) corresponding to a +node ID} \item{type}{specify whether to return just direct children / parents or all} diff --git a/man/CI.Rd b/man/CI.Rd index e41e9689..15036ef6 100644 --- a/man/CI.Rd +++ b/man/CI.Rd @@ -10,7 +10,7 @@ CI(tree, data, cost = NULL, sitewise = FALSE) RI(tree, data, cost = NULL, sitewise = FALSE) } \arguments{ -\item{tree}{tree to start the nni search from.} +\item{tree}{a phylogenetic tree, i.e. an object of class \code{phylo}.} \item{data}{A object of class phyDat containing sequences.} @@ -18,6 +18,9 @@ RI(tree, data, cost = NULL, sitewise = FALSE) \item{sitewise}{return CI/RI for alignment or sitewise} } +\value{ +a scalar or vector with the CI/RI vector. +} \description{ \code{CI} and \code{RI} compute the Consistency Index (CI) and Retention Index (RI). @@ -28,6 +31,27 @@ divided by the number of changes required on the tree (parsimony score). The Consistency Index is equal to one if there is no homoplasy. And the Retention Index is defined as \deqn{RI = \frac{MaxChanges - ObsChanges}{MaxChanges - MinChanges}}{RI = (MaxChanges - ObsChanges) / (MaxChanges - MinChanges)} +} +\examples{ +example(as.phylo.formula) +lab <- tr$tip.label +lab[79] <- "Herpestes fuscus" +tr$tip.label <- abbreviateGenus(lab) +X <- matrix(0, 112, 3, dimnames = list(tr$tip.label, c("Canis", "Panthera", + "Canis_Panthera"))) +desc_canis <- Descendants(tr, "Canis")[[1]] +desc_panthera <- Descendants(tr, "Panthera")[[1]] +X[desc_canis, c(1,3)] <- 1 +X[desc_panthera, c(2,3)] <- 1 +col <- rep("black", 112) +col[desc_panthera] <- "red" +col[desc_canis] <- "blue" +X <- phyDat(X, "USER", levels=c(0,1)) +plot(tr, "f", tip.color=col) +# The first two sites are homoplase free! +CI(tr, X, sitewise=TRUE) +RI(tr, X, sitewise=TRUE) + } \seealso{ \code{\link{parsimony}}, \code{\link{pratchet}}, diff --git a/man/add_ci.Rd b/man/add_ci.Rd index c5ac6b5f..9a3c8535 100644 --- a/man/add_ci.Rd +++ b/man/add_ci.Rd @@ -28,6 +28,9 @@ blue.} \item{\dots}{arguments passed to other functions, \code{\link{legend}} or \code{\link{bxp}}.} } +\value{ +Nothing. Function is called for adding to a plot. +} \description{ These are low-level plotting commands to draw the confidence intervals on the node of a tree as rectangles with coloured backgrounds or @@ -50,7 +53,7 @@ add_boxplot(tree, trees, boxwex=.7) } \seealso{ \code{\link{plot.phylo}}, \code{\link{plotBS}}, -\code{\link{add_edge_length}} +\code{\link{add_edge_length}}, \code{\link{maxCladeCred}} } \author{ Emmanuel Paradis, Santiago Claramunt, Joseph Brown, Klaus Schliep diff --git a/man/add_edge_length.Rd b/man/add_edge_length.Rd index c010e8cd..424cba2a 100644 --- a/man/add_edge_length.Rd +++ b/man/add_edge_length.Rd @@ -5,10 +5,11 @@ \title{Assign and compute edge lengths from a sample of trees} \usage{ add_edge_length(tree, trees, fun = function(x) median(na.omit(x)), - rooted = TRUE) + rooted = all(is.rooted(trees))) } \arguments{ -\item{tree}{tree where edge lengths are assigned to.} +\item{tree}{a phylogenetic tree or splitnetwork where edge lengths are +assigned to.} \item{trees}{an object of class multiPhylo, where the average for the edges is computed from.} @@ -19,6 +20,9 @@ is computed from.} observed splits, if TRUE edge lengths are estimated from height for the observed clades.} } +\value{ +The tree with newly assigned edge length. +} \description{ This command can infer some average edge lengths and assign them from a (bootstrap/MCMC) sample. diff --git a/man/allTrees.Rd b/man/allTrees.Rd index 4e45a551..d235c3f0 100644 --- a/man/allTrees.Rd +++ b/man/allTrees.Rd @@ -17,8 +17,8 @@ allTrees(n, rooted = FALSE, tip.label = NULL) an object of class \code{multiPhylo}. } \description{ -\code{allTrees} computes all tree topologies for rooted or unrooted trees -with up to 10 tips. \code{allTrees} returns bifurcating trees. +\code{allTrees} computes all bifurcating tree topologies for rooted or unrooted +trees with up to 10 tips. The number of trees grows fast } \examples{ @@ -31,7 +31,8 @@ par(old.par) } \seealso{ -\code{\link[ape]{rtree}}, \code{\link{nni}} +\code{\link[ape]{rtree}}, \code{\link{nni}}, +\code{\link[ape]{howmanytrees}}, \code{\link{dfactorial}} } \author{ Klaus Schliep \email{klaus.schliep@gmail.com} diff --git a/man/ancestral.pml.Rd b/man/ancestral.pml.Rd index 96c59f2b..b84f3a60 100644 --- a/man/ancestral.pml.Rd +++ b/man/ancestral.pml.Rd @@ -1,80 +1,83 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/ancestral_pml.R +% Please edit documentation in R/ancestral.R \name{ancestral.pml} \alias{ancestral.pml} +\alias{as.phyDat.ancestral} \alias{ancestral.pars} \alias{pace} -\alias{plotAnc} \title{Ancestral character reconstruction.} \usage{ -ancestral.pml(object, type = "marginal", return = "prob") +ancestral.pml(object, type = "marginal", ...) -ancestral.pars(tree, data, type = c("MPR", "ACCTRAN", "POSTORDER"), - cost = NULL, return = "prob") +\method{as.phyDat}{ancestral}(x, ...) -pace(tree, data, type = c("MPR", "ACCTRAN", "POSTORDER"), cost = NULL, - return = "prob") +ancestral.pars(tree, data, type = c("MPR", "ACCTRAN", "POSTORDER"), + cost = NULL, ...) -plotAnc(tree, data, i = 1, site.pattern = TRUE, col = NULL, - cex.pie = par("cex"), pos = "bottomright", ...) +pace(tree, data, type = c("MPR", "ACCTRAN", "POSTORDER"), cost = NULL, ...) } \arguments{ \item{object}{an object of class pml} \item{type}{method used to assign characters to internal nodes, see details.} -\item{return}{return a \code{phyDat} object or matrix of probabilities.} +\item{\dots}{Further arguments passed to or from other methods.} + +\item{x}{an object of class ancestral} \item{tree}{a tree, i.e. an object of class pml} \item{data}{an object of class phyDat} \item{cost}{A cost matrix for the transitions between two states.} - -\item{i}{plots the i-th site pattern of the \code{data}.} - -\item{site.pattern}{logical, plot i-th site pattern or i-th site} - -\item{col}{a vector containing the colors for all possible states.} - -\item{cex.pie}{a numeric defining the size of the pie graphs} - -\item{pos}{a character string defining the position of the legend} - -\item{\dots}{Further arguments passed to or from other methods.} } \value{ -%A matrix containing the the estimates character states. An object -of class "phyDat", containing the ancestral states of all nodes. +An object of class ancestral. This is a list containing the tree with +node labels, the original alignment as an \code{phyDat} object, a +\code{data.frame} containing the probabilities belonging to a state for all +(internal nodes) and the most likely state. } \description{ Marginal reconstruction of the ancestral character states. } \details{ The argument "type" defines the criterion to assign the internal nodes. For -\code{ancestral.pml} so far "ml" and (empirical) "bayes" and for +\code{ancestral.pml} so far "ml and marginal (empirical) "bayes" and for \code{ancestral.pars} "MPR" and "ACCTRAN" are possible. +The function return a list containing the tree with node labels, the original +alignment as an \code{phyDat} object, a data.frame containing the +probabilities belonging to a state for all (internal nodes) and the most +likely state. For parsimony and nucleotide data the most likely state might +be ambiguous. For ML this is very unlikely to be the case. + +If the input tree does not contain unique node labels the function +\code{ape::MakeNodeLabel} is used to create them. + With parsimony reconstruction one has to keep in mind that there will be often no unique solution. +The functions use the node labels of the provided tree (also if part of the +\code{pml} object) if these are unique. Otherwise the function +\code{ape::MakeNodeLabel} is used to create them. + For further details see vignette("Ancestral"). } \examples{ example(NJ) +# generate node labels to ensure plotting will work +tree <- makeNodeLabel(tree) fit <- pml(tree, Laurasiatherian) anc.ml <- ancestral.pml(fit, type = "ml") anc.p <- ancestral.pars(tree, Laurasiatherian) -\dontrun{ -require(seqLogo) -seqLogo( t(subset(anc.ml, 48, 1:20)[[1]]), ic.scale=FALSE) -seqLogo( t(subset(anc.p, 48, 1:20)[[1]]), ic.scale=FALSE) -} -# plot the first site pattern -plotAnc(tree, anc.ml, 1) +# plot ancestral sequences at the root +plotSeqLogo( anc.ml, 48, 1, 20) +plotSeqLogo( anc.p, 48, 1, 20) +# plot the first character +plotAnc(anc.ml) # plot the third character -plotAnc(tree, anc.ml, attr(anc.ml, "index")[3]) +plotAnc(anc.ml, 3) } \references{ @@ -89,7 +92,9 @@ Press, Oxford. } \seealso{ \code{\link{pml}}, \code{\link{parsimony}}, \code{\link[ape]{ace}}, -\code{\link[ape]{root}} +\code{\link{plotAnc}}, \code{\link{ltg2amb}}, \code{\link{latag2n}}, +\code{\link{gap_as_state}}, \code{\link[ape]{root}}, +\code{\link[ape]{makeNodeLabel}} } \author{ Klaus Schliep \email{klaus.schliep@gmail.com} diff --git a/man/as.networx.Rd b/man/as.networx.Rd index 7accd4f9..18b1f0a6 100644 --- a/man/as.networx.Rd +++ b/man/as.networx.Rd @@ -23,6 +23,9 @@ splits (may excludes splits).} \item{coord}{add coordinates of the nodes, allows to reproduce the plot.} } +\value{ +an object of class \code{networx}. +} \description{ \code{as.networx} convert \code{splits} objects into a \code{networx} object. And most important there exists a generic \code{plot} function to diff --git a/man/as.phyDat.Rd b/man/as.phyDat.Rd index 18057479..afba3199 100644 --- a/man/as.phyDat.Rd +++ b/man/as.phyDat.Rd @@ -12,9 +12,12 @@ \alias{as.phyDat} \alias{as.phyDat.factor} \alias{as.phyDat.DNAbin} +\alias{as.phyDat.AAbin} \alias{as.phyDat.alignment} \alias{phyDat2alignment} \alias{as.phyDat.MultipleAlignment} +\alias{as.phyDat.AAStringSet} +\alias{as.phyDat.DNAStringSet} \alias{as.character.phyDat} \alias{as.data.frame.phyDat} \alias{as.DNAbin.phyDat} @@ -30,12 +33,18 @@ as.phyDat(x, ...) \method{as.phyDat}{DNAbin}(x, ...) +\method{as.phyDat}{AAbin}(x, ...) + \method{as.phyDat}{alignment}(x, type = "DNA", ...) phyDat2alignment(x) \method{as.phyDat}{MultipleAlignment}(x, ...) +\method{as.phyDat}{AAStringSet}(x, ...) + +\method{as.phyDat}{DNAStringSet}(x, ...) + \method{as.MultipleAlignment}{phyDat}(x, ...) \method{as.character}{phyDat}(x, allLevels = TRUE, ...) @@ -104,7 +113,7 @@ all.equal(Laurasiatherian, as.phyDat(LauraDNAbin)) \seealso{ [DNAbin()], [as.DNAbin()], \code{\link{baseFreq}}, \code{\link{glance.phyDat}}, -\code{\link{read.dna}}, \code{\link{read.aa}}, \code{\link{read.nexus.data}} +\code{\link{read.dna}}, \code{\link{read.nexus.data}} and the chapter 1 in the \code{vignette("phangorn-specials", package="phangorn")} and the example of \code{\link{pmlMix}} for the use of \code{allSitePattern} diff --git a/man/bab.Rd b/man/bab.Rd index 853233f1..923d0cf2 100644 --- a/man/bab.Rd +++ b/man/bab.Rd @@ -5,7 +5,7 @@ \alias{BranchAndBound} \title{Branch and bound for finding all most parsimonious trees} \usage{ -bab(data, tree = NULL, trace = 1, ...) +bab(data, tree = NULL, trace = 0, ...) } \arguments{ \item{data}{an object of class phyDat.} diff --git a/man/baseFreq.Rd b/man/baseFreq.Rd index 970d5082..5ba3339e 100644 --- a/man/baseFreq.Rd +++ b/man/baseFreq.Rd @@ -3,11 +3,14 @@ \name{baseFreq} \alias{baseFreq} \alias{glance.phyDat} +\alias{composition_test} \title{Summaries of alignments} \usage{ baseFreq(obj, freq = FALSE, all = FALSE, drop.unused.levels = FALSE) \method{glance}{phyDat}(x, ...) + +composition_test(obj) } \arguments{ \item{obj, x}{as object of class phyDat} @@ -31,6 +34,8 @@ contrast.} \code{baseFreq} computes the frequencies (absolute or relative) of the states from a sample of sequences. \code{glance} computes some useful information about the alignment. +\code{composition\_test} computes a \eqn{\chi^2}-test testing if the state +composition for a species differs. } \examples{ @@ -43,6 +48,7 @@ baseFreq(Laurasiatherian, freq=TRUE) baseFreq(chloroplast) glance(Laurasiatherian) glance(chloroplast) +composition_test(Laurasiatherian)[1:10,] } \seealso{ \code{\link{phyDat}, \link{base.freq}, \link{glance}} diff --git a/man/coalSpeciesTree.Rd b/man/coalSpeciesTree.Rd index 2e7e05c4..878be9ed 100644 --- a/man/coalSpeciesTree.Rd +++ b/man/coalSpeciesTree.Rd @@ -27,6 +27,13 @@ et al. (2010) from the element wise minima of the cophenetic matrices of the gene trees. It extends \code{speciesTree} in ape as it allows that have several individuals per gene tree. } +\examples{ +## example in Liu et al. (2010) +tr1 <- read.tree(text = "(((B:0.05,C:0.05):0.01,D:0.06):0.04,A:0.1);") +tr2 <- read.tree(text = "(((A:0.07,C:0.07):0.02,D:0.09):0.03,B:0.12);") +TR <- c(tr1, tr2) +sp_tree <- coalSpeciesTree(TR) +} \references{ Liu, L., Yu, L. and Pearl, D. K. (2010) Maximum tree: a consistent estimator of the species tree. \emph{Journal of Mathematical diff --git a/man/consensusNet.Rd b/man/consensusNet.Rd index 225e169e..001f15f3 100644 --- a/man/consensusNet.Rd +++ b/man/consensusNet.Rd @@ -31,7 +31,7 @@ set.seed(1) bs <- bootstrap.phyDat(Laurasiatherian, FUN = function(x)nj(dist.hamming(x)), bs=50) cnet <- consensusNet(bs, .3) -plot(cnet) +plot(cnet, angle=-60, direction="axial") \dontrun{ library(rgl) open3d() diff --git a/man/cophenetic.networx.Rd b/man/cophenetic.networx.Rd index 1adc540c..91da6119 100644 --- a/man/cophenetic.networx.Rd +++ b/man/cophenetic.networx.Rd @@ -18,6 +18,10 @@ labels (as given by the element \code{tip.label} of the argument \code{x}). \code{cophenetic.networx} computes the pairwise distances between the pairs of tips from a phylogenetic network using its branch lengths. } +\examples{ +example(neighborNet) +cophenetic(nnet) +} \seealso{ \code{\link[stats]{cophenetic}} for the generic function, \code{neighborNet} to construct a network from a distance matrix diff --git a/man/densiTree.Rd b/man/densiTree.Rd index efd0844c..a6d88b1a 100644 --- a/man/densiTree.Rd +++ b/man/densiTree.Rd @@ -4,11 +4,12 @@ \alias{densiTree} \title{Plots a densiTree.} \usage{ -densiTree(x, type = "cladogram", alpha = 1/length(x), consensus = NULL, - direction = "rightwards", optim = FALSE, scaleX = FALSE, col = 1, - width = 1, lty = 1, cex = 0.8, font = 3, tip.color = 1, adj = 0, - srt = 0, underscore = FALSE, label.offset = 0, scale.bar = TRUE, - jitter = list(amount = 0, random = TRUE), ...) +densiTree(x, type = "phylogram", ..., alpha = 1/length(x), + consensus = NULL, direction = "rightwards", optim = FALSE, + scaleX = FALSE, col = 1, width = 1, lty = 1, cex = 0.8, font = 3, + tip.color = 1, adj = 0, srt = 0, underscore = FALSE, + label.offset = 0, scale.bar = TRUE, jitter = list(amount = 0, random = + TRUE), tip.dates = NULL, xlim = NULL, ylim = NULL) } \arguments{ \item{x}{an object of class \code{multiPhylo}.} @@ -16,6 +17,8 @@ densiTree(x, type = "cladogram", alpha = 1/length(x), consensus = NULL, \item{type}{a character string specifying the type of phylogeny, so far "cladogram" (default) or "phylogram" are supported.} +\item{\dots}{further arguments to be passed to plot.} + \item{alpha}{parameter for semi-transparent colors.} \item{consensus}{A tree or character vector which is used to define the order @@ -61,7 +64,14 @@ of the phylogeny and their corresponding labels.} \item{jitter}{allows to shift trees. a list with two arguments: the amount of jitter and random or equally spaced (see details below)} -\item{\dots}{further arguments to be passed to plot.} +\item{tip.dates}{A named vector of sampling times associated with the tips.} + +\item{xlim}{the x limits of the plot.} + +\item{ylim}{the y limits of the plot.} +} +\value{ +\code{densiTree} returns silently x. } \description{ An R function to plot trees similar to those produced by DensiTree. @@ -108,7 +118,7 @@ trees \emph{Bioinformatics}, \bold{26 (10)}, 1372-1373. } \seealso{ \code{\link{plot.phylo}}, \code{\link{plot.networx}}, -\code{\link{jitter}} +\code{\link{jitter}}, \code{\link{rtt}} } \author{ Klaus Schliep \email{klaus.schliep@gmail.com} diff --git a/man/designTree.Rd b/man/designTree.Rd index c14e809c..30a62297 100644 --- a/man/designTree.Rd +++ b/man/designTree.Rd @@ -10,7 +10,7 @@ \title{Compute a design matrix or non-negative LS} \usage{ designTree(tree, method = "unrooted", sparse = FALSE, tip.dates = NULL, - ...) + calibration = NULL, ...) nnls.tree(dm, tree, method = c("unrooted", "ultrametric", "tipdated"), rooted = NULL, trace = 1, weight = NULL, balanced = FALSE, @@ -18,9 +18,9 @@ nnls.tree(dm, tree, method = c("unrooted", "ultrametric", "tipdated"), nnls.phylo(x, dm, method = "unrooted", trace = 0, ...) -nnls.splits(x, dm, trace = 0) +nnls.splits(x, dm, trace = 0, eps = 1e-08) -nnls.networx(x, dm) +nnls.networx(x, dm, eps = 1e-08) designSplits(x, splits = "all", ...) } @@ -31,7 +31,11 @@ designSplits(x, splits = "all", ...) \item{sparse}{return a sparse design matrix.} -\item{tip.dates}{a vector of sampling times associated to the tips of tree.} +\item{tip.dates}{a named vector of sampling times associated to the tips of +the tree.} + +\item{calibration}{a named vector of calibration times associated to nodes of +the tree.} \item{\dots}{further arguments, passed to other methods.} @@ -49,6 +53,8 @@ minimized.} \item{x}{number of taxa.} +\item{eps}{minimum edge length (default s 1e-8).} + \item{splits}{one of "all", "star".} } \value{ diff --git a/man/discrete.gamma.Rd b/man/discrete.gamma.Rd index 877e9061..a1f889e9 100644 --- a/man/discrete.gamma.Rd +++ b/man/discrete.gamma.Rd @@ -87,7 +87,7 @@ par(old.par) } \seealso{ -\code{\link{pml.fit}, \link{stepfun}, link{pgamma}, link{pbeta}}, +\code{\link{pml.fit}, \link{stepfun}, \link{pgamma}, \link{pbeta}}, } \author{ Klaus Schliep \email{klaus.schliep@gmail.com} diff --git a/man/dna2codon.Rd b/man/dna2codon.Rd index e6471bf9..f9077f4c 100644 --- a/man/dna2codon.Rd +++ b/man/dna2codon.Rd @@ -3,11 +3,14 @@ \name{dna2codon} \alias{dna2codon} \alias{codon2dna} +\alias{dna2aa} \title{Translate nucleic acid sequences into codons} \usage{ dna2codon(x, codonstart = 1, code = 1, ambiguity = "---", ...) codon2dna(x) + +dna2aa(x, codonstart = 1, code = 1) } \arguments{ \item{x}{An object containing sequences.} @@ -29,7 +32,8 @@ The functions return an object of class \code{phyDat}. } \description{ The function transforms \code{dna2codon} DNA sequences to codon sequences, -\code{codon2dna} transform the other way. +\code{codon2dna} transform the other way. \code{dna2codon} translates +nucleotide to amino acids using the function \code{\link{trans}}. } \details{ The following genetic codes are described here. The number preceding each diff --git a/man/gap_as_state.Rd b/man/gap_as_state.Rd new file mode 100644 index 00000000..b2347a88 --- /dev/null +++ b/man/gap_as_state.Rd @@ -0,0 +1,45 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/gap_as_state.R +\name{gap_as_state} +\alias{gap_as_state} +\alias{gap_as_ambiguous} +\alias{has_gap_state} +\title{Treat gaps as a state} +\usage{ +gap_as_state(obj, gap = "-", ambiguous = "?") + +gap_as_ambiguous(obj, gap = "-") + +has_gap_state(obj) +} +\arguments{ +\item{obj}{An object of class phyDat.} + +\item{gap}{a character which codes for the gaps (default is "-").} + +\item{ambiguous}{a character which codes for the ambiguous state} +} +\value{ +The functions return an object of class \code{phyDat}. +} +\description{ +The function \code{gap_as_state} changes the contrast of an phyDat object to +treat as its own state. Internally \code{phyDat} are stored similar to a +\code{factor} objects and only the contrast matrix and some attributes +change. +} +\examples{ +data(Laurasiatherian) +tmp <- gap_as_state(Laurasiatherian) +contr <- attr(tmp, "contrast") +rownames(contr) <- attr(tmp, "allLevels") +contr +} +\seealso{ +\code{\link{phyDat}}, \code{\link{lt2amb}}, \code{\link{latag2n}}, +\code{\link{ancestral.pml}}, \code{\link{gap_as_state}} +} +\author{ +Klaus Schliep \email{klaus.schliep@gmail.com} +} +\keyword{cluster} diff --git a/man/identify.networx.Rd b/man/identify.networx.Rd index 33c08eae..4e27c39e 100644 --- a/man/identify.networx.Rd +++ b/man/identify.networx.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/networx.R +% Please edit documentation in R/plot_networx.R \name{identify.networx} \alias{identify.networx} \title{Identify splits in a network} diff --git a/man/image.phyDat.Rd b/man/image.phyDat.Rd index 8917463c..637c9e1b 100644 --- a/man/image.phyDat.Rd +++ b/man/image.phyDat.Rd @@ -11,11 +11,19 @@ \item{...}{further arguments passed to or from other methods.} } +\value{ +Nothing. The function is called for plotting. +} \description{ This function plots an image of an alignment of sequences. } \details{ A wrapper for using \code{\link{image.DNAbin}} and \code{\link{image.AAbin}}. +Codons triplets are handled as nucleotide sequences. +} +\examples{ +data("chloroplast") +image(chloroplast[, 1:50], scheme="Clustal", show.aa = TRUE) } \seealso{ \code{\link{image.DNAbin}}, \code{\link{image.AAbin}} diff --git a/man/ltg2amb.Rd b/man/ltg2amb.Rd new file mode 100644 index 00000000..f6e68f24 --- /dev/null +++ b/man/ltg2amb.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lt2amb.R +\name{ltg2amb} +\alias{ltg2amb} +\title{Replace leading and trailing alignment gaps with an ambiguous state} +\usage{ +ltg2amb(x, amb = ifelse(attr(x, "type") == "DNA", "N", "?"), gap = "-") +} +\arguments{ +\item{x}{an object of class \code{phyDat}.} + +\item{amb}{character of the ambiguous state t replace the gaps.} + +\item{gap}{gap parameter to replace.} +} +\value{ +returns an object of class \code{phyDat}. +} +\description{ +Substitutes leading and trailing alignment gaps in aligned sequences into N +(i.e., A, C, G, or T) or ?. The gaps in the middle of the sequences are left +unchanged. +} +\examples{ +x <- phyDat(matrix(c("-", "A", "G", "-", "T", "C"), 2, 3)) +y <- ltg2amb(x) +image(x) +image(y) +} +\seealso{ +\code{\link{latag2n}}, \code{\link{ancestral.pml}}, +\code{\link{gap_as_state}} +} +\keyword{cluster} diff --git a/man/maxCladeCred.Rd b/man/maxCladeCred.Rd index 58c08e53..1de7ead8 100644 --- a/man/maxCladeCred.Rd +++ b/man/maxCladeCred.Rd @@ -30,16 +30,19 @@ credibility or a numeric vector of clade credibilities for each tree. \description{ \code{maxCladeCred} computes the maximum clade credibility tree from a sample of trees. +So far just the best tree is returned. No annotations or transformations of +edge length are performed and the edge length are taken from the tree. } \details{ -So far just the best tree is returned. No annotations or transformations of -edge length are performed. - If a list of partition is provided then the clade credibility is computed for the trees in x. \code{allCompat} returns a 50\% majority rule consensus tree with added -compatible splits similar to the option allcompat in MrBayes. +compatible splits similar to the option allcompat in MrBayes. This tree has +no edge length. + +\code{\link{add_edge_length}} can be used to add edge lengths computed from +the sample of trees. } \examples{ @@ -60,6 +63,14 @@ plot(strict_consensus, main="Strict consensus tree") plot(majority_consensus, main="Majority consensus tree") plot(all_compat, main="Majority consensus tree with compatible splits") plot(max_clade_cred, main="Maximum clade credibility tree") + +par(mfrow = c(2,1)) +plot(max_clade_cred, main="Edge length from tree") +add_boxplot(max_clade_cred, bs) +max_clade_cred_2 <- add_edge_length(max_clade_cred, bs) +plot(max_clade_cred_2, main="Edge length computed from sample") +add_boxplot(max_clade_cred_2, bs) + par(old.par) # compute clade credibility for trees given a prop.part object @@ -72,7 +83,8 @@ maxCladeCred(c(tree, bs[[1]]), tree=FALSE, part = pp) \seealso{ \code{\link{consensus}}, \code{\link{consensusNet}}, \code{\link{prop.part}}, \code{\link{bootstrap.pml}}, \code{\link{plotBS}}, -\code{\link{transferBootstrap}} +\code{\link{transferBootstrap}}, \code{\link{add_edge_length}}, +\code{\link{add_boxplot}} } \author{ Klaus Schliep \email{klaus.schliep@gmail.com} diff --git a/man/midpoint.Rd b/man/midpoint.Rd index e3a98b06..5225c499 100644 --- a/man/midpoint.Rd +++ b/man/midpoint.Rd @@ -6,6 +6,7 @@ \alias{midpoint.phylo} \alias{midpoint.multiPhylo} \alias{pruneTree} +\alias{keep_as_tip} \title{Tree manipulation} \usage{ getRoot(tree) @@ -17,6 +18,8 @@ midpoint(tree, node.labels = "support", ...) \method{midpoint}{multiPhylo}(tree, node.labels = "support", ...) pruneTree(tree, ..., FUN = ">=") + +keep_as_tip(tree, labels) } \arguments{ \item{tree}{an object of class \code{phylo}.} @@ -27,6 +30,8 @@ should labels get 'deleted'?} \item{\dots}{further arguments, passed to other methods.} \item{FUN}{a function evaluated on the nodelabels, result must be logical.} + +\item{labels}{tip and node labels to keep as tip labels in the tree} } \value{ \code{pruneTree} and \code{midpoint} a tree. \code{getRoot} returns @@ -35,13 +40,16 @@ the root node. \description{ \code{midpoint} performs midpoint rooting of a tree. \code{pruneTree} produces a consensus tree. -} -\details{ \code{pruneTree} prunes back a tree and produces a consensus tree, for trees already containing nodelabels. It assumes that nodelabels are numerical or character that allows conversion to numerical, it uses -as.numeric(as.character(tree$node.labels)) to convert them. \code{midpoint} -so far does not transform node.labels properly. +as.numeric(as.character(tree$node.labels)) to convert them. +\code{midpoint} by default assumes that node labels contain support values. +This works if support values are computed from splits, but should be +recomputed for clades. +\code{keep_as_tip} takes a list of tips and/or node labels and returns a tree +pruned to those. If node label, then it prunes all descendants of that node +until that internal node becomes a tip. } \examples{ diff --git a/man/parsimony.Rd b/man/parsimony.Rd index e925456c..33bc04e3 100644 --- a/man/parsimony.Rd +++ b/man/parsimony.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/ancestral_pml.R, R/fitch64.R, R/parsimony.R, +% Please edit documentation in R/ancestral.R, R/fitch64.R, R/parsimony.R, % R/sankoff.R \name{acctran} \alias{acctran} @@ -67,15 +67,20 @@ found during the search. \code{acctran} returns a tree with edge length according to the ACCTRAN criterion. } \description{ -\code{parsimony} returns the parsimony score of a tree using either the -sankoff or the fitch algorithm. \code{optim.parsimony} tries to find the -maximum parsimony tree using either Nearest Neighbor Interchange (NNI) -rearrangements or sub tree pruning and regrafting (SPR). \code{pratchet} -implements the parsimony ratchet (Nixon, 1999) and is the preferred way to -search for the best tree. \code{random.addition} can be used to produce -starting trees. +\code{pratchet} implements the parsimony ratchet (Nixon, 1999) and is the +preferred way to search for the best parsimony tree. For small number of taxa +the function \code{\link{bab}} can be used to compute all most parsimonious +trees. } \details{ +\code{parsimony} returns the parsimony score of a tree using either the +sankoff or the fitch algorithm. +\code{optim.parsimony} optimizes the topology using either Nearest Neighbor +Interchange (NNI) rearrangements or sub tree pruning and regrafting (SPR) and +is used inside \code{pratchet}. \code{random.addition} can be used to produce +starting trees and is an option for the argument perturbation in +\code{pratchet}. + The "SPR" rearrangements are so far only available for the "fitch" method, "sankoff" only uses "NNI". The "fitch" algorithm only works correct for binary trees. diff --git a/man/phangorn-internal.Rd b/man/phangorn-internal.Rd index e8f1f64c..1c5e438e 100644 --- a/man/phangorn-internal.Rd +++ b/man/phangorn-internal.Rd @@ -1,12 +1,13 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/RcppExports.R, R/candidate_tree.R, -% R/hash_phylo.R, R/networx.R, R/phangorn-package.R, R/phyDat.R, R/pmlPen.R +% R/hash_phylo.R, R/phangorn-package.R, R/phyDat.R, R/plot_networx.R, +% R/pmlPen.R \name{threshStateC} \alias{threshStateC} \alias{candidate_tree} \alias{hash} -\alias{coords} \alias{phangorn-internal} +\alias{coords} \alias{map_duplicates} \alias{pmlPen} \title{Internal phangorn Functions} @@ -18,10 +19,10 @@ candidate_tree(x, method = c("unrooted", "ultrametric", "tipdated"), hash(x, ...) -coords(obj, dim = "3D") - map_duplicates(x, dist = length(x) < 500, ...) +coords(obj, dim = "3D") + pmlPen(object, lambda, ...) } \description{ diff --git a/man/phyDat.Rd b/man/phyDat.Rd index 6aa47de2..922e7bd7 100644 --- a/man/phyDat.Rd +++ b/man/phyDat.Rd @@ -124,7 +124,7 @@ allSitePattern(5) \seealso{ \code{\link{DNAbin}}, \code{\link{as.DNAbin}}, \code{\link{baseFreq}}, \code{\link{glance.phyDat}}, \code{\link{dna2codon}}, -\code{\link{read.dna}}, \code{\link{read.aa}}, \code{\link{read.nexus.data}} +\code{\link{read.dna}}, \code{\link{read.nexus.data}} and the chapter 1 in the \code{vignette("AdvancedFeatures", package="phangorn")} and the example of \code{\link{pmlMix}} for the use of \code{\link{allSitePattern}}. diff --git a/man/plot.ancestral.Rd b/man/plot.ancestral.Rd new file mode 100644 index 00000000..e6a5bcf2 --- /dev/null +++ b/man/plot.ancestral.Rd @@ -0,0 +1,82 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plotAnc.R +\name{plotAnc} +\alias{plotAnc} +\alias{plotSeqLogo} +\title{Plot ancestral character on a tree} +\usage{ +plotAnc(x, i = 1, type = "phylogram", ..., col = NULL, cex.pie = 0.5, + pos = "bottomright", scheme = NULL) + +plotSeqLogo(x, node = getRoot(x$tree), start = 1, end = 10, + scheme = "Ape_NT", ...) +} +\arguments{ +\item{x}{an object of class \code{ancestral}.} + +\item{i}{plots the i-th site.} + +\item{type}{a character string specifying the type of phylogeny to be drawn; +it must be one of "phylogram" (the default), "cladogram", "fan", "unrooted", +"radial", "tidy", or any unambiguous abbreviation of these.} + +\item{\dots}{Further arguments passed to or from other methods.} + +\item{col}{a vector containing the colors for all possible states.} + +\item{cex.pie}{a numeric defining the size of the pie graphs.} + +\item{pos}{a character string defining the position of the legend.} + +\item{scheme}{a predefined color scheme. For amino acid options are "Ape_AA", +"Zappo_AA", "Clustal", "Polarity" and "Transmembrane_tendency", for +nucleotides "Ape_NT" and"RY_NT". Names can be abbreviated.} + +\item{node}{to plot for which the probabilities should be plotted.} + +\item{start}{start position to plot.} + +\item{end}{end position to plot.} +} +\value{ +\code{plotAnc} returns silently x. + +\code{plotSeqLogo} returns a ggplot object. +} +\description{ +\code{plotAnc} plots a phylogeny and adds character to the nodes. Either +takes output from \code{ancestral.pars} or \code{ancestral.pml} or from an +alignment where there are node labels in the tree match the constructed +sequences in the alignment. +} +\details{ +For further details see vignette("Ancestral"). +} +\examples{ + +example(NJ) +# generate node labels to ensure plotting will work +tree <- makeNodeLabel(tree) +anc.p <- ancestral.pars(tree, Laurasiatherian) +# plot the third character +plotAnc(anc.p, 3, pos="topright") +plotSeqLogo(anc.p, node="Node10", 1, 25) + +data(chloroplast) +tree <- pratchet(chloroplast, maxit=10, trace=0) +tree <- makeNodeLabel(tree) +anc.ch <- ancestral.pars(tree, chloroplast) +image(as.phyDat(anc.ch)[, 1:25]) +plotAnc(anc.ch, 21, scheme="Ape_AA", pos="topleft") +plotAnc(anc.ch, 21, scheme="Clustal", pos="topleft") +plotSeqLogo(anc.ch, node="Node1", 1, 25, scheme="Clustal") +} +\seealso{ +\code{\link{ancestral.pml}}, \code{\link[ape]{plot.phylo}}, +\code{\link[ape]{image.DNAbin}}, \code{\link[ape]{image.AAbin}} +\code{\link[ggseqlogo]{ggseqlogo}} +} +\author{ +Klaus Schliep \email{klaus.schliep@gmail.com} +} +\keyword{plot} diff --git a/man/plot.networx.Rd b/man/plot.networx.Rd index b10eab0f..ee14cdc5 100644 --- a/man/plot.networx.Rd +++ b/man/plot.networx.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/networx.R +% Please edit documentation in R/plot_networx.R \name{plot.networx} \alias{plot.networx} \title{plot phylogenetic networks} @@ -12,7 +12,8 @@ split.lty = NULL, font = 3, cex = par("cex"), cex.node.label = cex, cex.edge.label = cex, col.node.label = tip.color, col.edge.label = tip.color, font.node.label = font, - font.edge.label = font, underscore = FALSE, ...) + font.edge.label = font, underscore = FALSE, angle = 0, digits = 3, + ...) } \arguments{ \item{x}{an object of class \code{"networx"}} @@ -75,8 +76,17 @@ labels.} \item{underscore}{a logical specifying whether the underscores in tip labels should be written as spaces (the default) or left as are (if TRUE).} +\item{angle}{rotate the plot.} + +\item{digits}{if edge labels are numerical a positive integer indicating how +many significant digits are to be used.} + \item{\dots}{Further arguments passed to or from other methods.} } +\value{ +\code{plot.networx} returns invisibly a list with paramters of the +plot. +} \description{ So far not all parameters behave the same on the the \code{rgl} \code{"3D"} and basic graphic \code{"2D"} device. @@ -95,6 +105,7 @@ tree1 <- rtree(20, rooted=FALSE) sp <- as.splits(rNNI(tree1, n=10)) net <- as.networx(sp) plot(net) +plot(net, direction="axial") \dontrun{ # also see example in consensusNet example(consensusNet) diff --git a/man/plot.pml.Rd b/man/plot.pml.Rd index f066c45f..35a212e5 100644 --- a/man/plot.pml.Rd +++ b/man/plot.pml.Rd @@ -19,8 +19,12 @@ and "downwards".} \item{\dots}{further parameters to be passed to \code{plot.phylo}.} } +\value{ +\code{plot.pml} returns invisibly a list with arguments dexcribing the plot. +For further details see the \code{plot.phylo}. +} \description{ -\code{plot.pml} is a warapper around \code{plot.phylo} with different default +\code{plot.pml} is a wrapper around \code{plot.phylo} with different default values for unrooted, ultrametric and tip dated phylogenies. } \examples{ diff --git a/man/plotBS.Rd b/man/plotBS.Rd index 13226e0b..04d50ea2 100644 --- a/man/plotBS.Rd +++ b/man/plotBS.Rd @@ -1,24 +1,29 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/bootstrap.R +% Please edit documentation in R/plotBS.R \name{plotBS} \alias{plotBS} +\alias{add_support} \title{Plotting trees with bootstrap values} \usage{ -plotBS(tree, BStrees, type = "phylogram", method = "FBP", - bs.col = "black", bs.adj = NULL, digits = 3, p = 0, frame = "none", - ...) +plotBS(tree, trees, type = "phylogram", method = "FBP", bs.col = "black", + bs.adj = NULL, digits = 3, p = 0, frame = "none", tol = 1e-06, + sep = "/", ...) + +add_support(tree, trees, method = "FBP", tol = 1e-08, scale = TRUE, + frame = "none", digits = 3, sep = "/", ...) } \arguments{ \item{tree}{The tree on which edges the bootstrap values are plotted.} -\item{BStrees}{a list of trees (object of class "multiPhylo").} +\item{trees}{a list of trees (object of class "multiPhylo").} \item{type}{the type of tree to plot, one of "phylogram", "cladogram", "fan", "unrooted", "radial" or "none". If type is "none" the tree is returned with the bootstrap values assigned to the node labels.} -\item{method}{either "FBP" the classical bootstrap (default) or "TBE" -(transfer bootstrap)} +\item{method}{either "FBP" the classical bootstrap (default), "TBE" +(transfer bootstrap) or "MCC" for assigning clade credibilities. In case of +"MCC" all trees need to be rooted.} \item{bs.col}{color of bootstrap support labels.} @@ -34,21 +39,28 @@ vertical justification of the bootstrap labels.} around the bootstrap values. This must be one of "none" (the default), "rect" or "circle".} +\item{tol}{a numeric value giving the tolerance to consider a branch length +significantly greater than zero.} + +\item{sep}{seperator between the different methods.} + \item{\dots}{further parameters used by \code{plot.phylo}.} + +\item{scale}{return ratio or percentage.} } \value{ \code{plotBS} returns silently a tree, i.e. an object of class \code{phylo} with the bootstrap values as node labels. The argument -\code{BStrees} is optional and if not supplied the labels supplied +\code{trees} is optional and if not supplied the labels supplied in the \code{node.label} slot will be used. } \description{ \code{plotBS} plots a phylogenetic tree with the bootstrap values assigned to the (internal) edges. It can also used to assign bootstrap values to a -phylogenetic tree. +phylogenetic tree. \code{add_support} adds support values to a plot. } \details{ -\code{plotBS} can either assign the classical Felsenstein’s bootstrap +The functions can either assign the classical Felsenstein’s bootstrap proportions (FBP) (Felsenstein (1985), Hendy & Penny (1985)) or the transfer bootstrap expectation (TBE) of Lemoine et al. (2018). Using the option \code{type=="n"} just assigns the bootstrap values and return the tree @@ -79,9 +91,11 @@ Penny D. and Hendy M.D. (1986) Estimating the reliability of evolutionary trees. \emph{Molecular Biology and Evolution} \bold{3}, 403--417 } \seealso{ -\code{\link{transferBootstrap}}, \code{\link{plot.phylo}}, -\code{\link{maxCladeCred}}, \code{\link{nodelabels}}, -\code{\link{consensus}}, \code{\link{consensusNet}} +\code{\link{plot.phylo}}, \code{\link{add_ci}}, +\code{\link{nodelabels}}, +\code{\link{prop.clades}}, \code{\link{maxCladeCred}}, +\code{\link{transferBootstrap}}, \code{\link{consensus}}, +\code{\link{consensusNet}} } \author{ Klaus Schliep \email{klaus.schliep@gmail.com} diff --git a/man/pml.Rd b/man/pml.Rd index 679bbbfc..9d79876b 100644 --- a/man/pml.Rd +++ b/man/pml.Rd @@ -1,9 +1,13 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/modelTest.R, R/phylo.R +% Please edit documentation in R/modelTest.R, R/phylo.R, R/pml_generics.R \name{as.pml} \alias{as.pml} \alias{pml} \alias{optim.pml} +\alias{logLik.pml} +\alias{anova.pml} +\alias{vcov.pml} +\alias{print.pml} \title{Likelihood of a tree.} \usage{ as.pml(x, ...) @@ -16,6 +20,14 @@ optim.pml(object, optNni = FALSE, optBf = FALSE, optQ = FALSE, optRooted = FALSE, control = pml.control(), model = NULL, rearrangement = ifelse(optNni, "NNI", "none"), subs = NULL, ratchet.par = ratchet.control(), ...) + +\method{logLik}{pml}(object, ...) + +\method{anova}{pml}(object, ...) + +\method{vcov}{pml}(object, ...) + +\method{print}{pml}(x, ...) } \arguments{ \item{x}{So far only an object of class \code{modelTest}.} diff --git a/man/pml.control.Rd b/man/pml.control.Rd index 5116ac22..faaf81a7 100644 --- a/man/pml.control.Rd +++ b/man/pml.control.Rd @@ -8,7 +8,7 @@ pml.control(epsilon = 1e-08, maxit = 10, trace = 1, tau = 1e-08, statefreq = "empirical") -ratchet.control(iter = 20L, maxit = 200L, minit = 50L, prop = 1/2, +ratchet.control(iter = 20L, maxit = 200L, minit = 100L, prop = 1/2, rell = TRUE, bs = 1000L) } \arguments{ @@ -39,8 +39,8 @@ A list with components named as the arguments for controlling the fitting process. } \description{ -Auxiliary functions for \code{\link{optim.pml}} fitting. Use it to construct -a \code{control} or \code{ratchet.par} argument. +Auxiliary functions for providing \code{\link{optim.pml}, \link{pml_bb}} +fitting. Use it to construct a \code{control} or \code{ratchet.par} argument. } \details{ \code{pml.control} controls the fitting process. \code{epsilon} and diff --git a/man/pml.fit.Rd b/man/pml.fit.Rd index 8a3df6a1..53cb2f44 100644 --- a/man/pml.fit.Rd +++ b/man/pml.fit.Rd @@ -80,6 +80,16 @@ the package coalescentMCMC, but are not intended for end user. Most of the functions call C code and are far less forgiving if the import is not what they expect than \code{pml}. } +\examples{ +data(Laurasiatherian) +tree <- NJ(dist.ml(Laurasiatherian)) +bf <- rep(0.25, 4) +eig <- edQt() +pml.init(Laurasiatherian) +pml.fit(tree, Laurasiatherian, bf=bf, eig=eig) +pml.free() +pml(tree, Laurasiatherian) |> logLik() +} \references{ Felsenstein, J. (1981) Evolutionary trees from DNA sequences: a maximum likelihood approach. \emph{Journal of Molecular Evolution}, diff --git a/man/pml_bb.Rd b/man/pml_bb.Rd index c099dd84..333386f2 100644 --- a/man/pml_bb.Rd +++ b/man/pml_bb.Rd @@ -56,10 +56,10 @@ Currently very experimental and likely to change. } \examples{ -\dontrun{ data(woodmouse) -tmp <- pml_bb(woodmouse) +tmp <- pml_bb(woodmouse, model="HKY+I", rearrangement="NNI") +\dontrun{ data(Laurasiatherian) mt <- modelTest(Laurasiatherian) fit <- pml_bb(mt) diff --git a/man/read.aa.Rd b/man/read.aa.Rd deleted file mode 100644 index 347ceb22..00000000 --- a/man/read.aa.Rd +++ /dev/null @@ -1,53 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/phyDat.R -\name{read.aa} -\alias{read.aa} -\title{Read Amino Acid Sequences in a File} -\usage{ -read.aa(file, format = "interleaved", skip = 0, nlines = 0, - comment.char = "#", seq.names = NULL) -} -\arguments{ -\item{file}{a file name specified by either a variable of mode character, or -a double-quoted string.} - -\item{format}{a character string specifying the format of the DNA sequences. -Three choices are possible: \code{"interleaved"}, \code{"sequential"}, or -\code{"fasta"}, or any unambiguous abbreviation of these.} - -\item{skip}{the number of lines of the input file to skip before beginning -to read data.} - -\item{nlines}{the number of lines to be read (by default the file is read -until its end).} - -\item{comment.char}{a single character, the remaining of the line after this -character is ignored.} - -\item{seq.names}{the names to give to each sequence; by default the names -read in the file are used.} -} -\value{ -a matrix of amino acid sequences. -} -\description{ -This function reads amino acid sequences in a file, and returns a matrix -list of DNA sequences with the names of the taxa read in the file as row -names. -} -\references{ -% Anonymous. FASTA format description. % -\url{https://en.wikipedia.org/wiki/FASTA_format} - -Felsenstein, J. (1993) Phylip (Phylogeny Inference Package) version 3.5c. -Department of Genetics, University of Washington. -\url{https://evolution.genetics.washington.edu/phylip/phylip.html} -} -\seealso{ -\code{\link[ape]{read.dna}}, \code{\link[ape]{read.GenBank}}, -\code{\link[phangorn]{phyDat}}, \code{\link[seqinr]{read.alignment}} -} -\author{ -Klaus Schliep \email{klaus.schliep@gmail.com} -} -\keyword{IO} diff --git a/man/read.nexus.partitions.Rd b/man/read.nexus.partitions.Rd index 40bf5a4a..468746c5 100644 --- a/man/read.nexus.partitions.Rd +++ b/man/read.nexus.partitions.Rd @@ -9,14 +9,18 @@ read.nexus.partitions(file, return = "list", ...) \arguments{ \item{file}{a file name.} -\item{return}{either return a list where eeach element is a 'phyDat' object +\item{return}{either returns a list where each element is a 'phyDat' object or an object of class 'multiphyDat'} \item{\dots}{Further arguments passed to or from other methods.} } +\value{ +a list where each element is a 'phyDat' object or an object of class +'multiphyDat'. +} \description{ \code{read.nexus.partitions} reads in sequences in NEXUS format and splits -the data according to the charsets givb in the SETS block. +the data according to the charsets given in the SETS block. } \examples{ tree <- rtree(10) diff --git a/man/read.phyDat.Rd b/man/read.phyDat.Rd index e62c4325..aa4710d3 100644 --- a/man/read.phyDat.Rd +++ b/man/read.phyDat.Rd @@ -40,7 +40,7 @@ These functions read and write sequence alignments. \details{ \code{write.phyDat} calls the function \code{\link[ape]{write.dna}} or \code{\link[ape]{write.nexus.data}} and \code{read.phyDat} calls the function -\code{\link[ape]{read.dna}}, \code{read.aa} or \code{read.nexus.data}, so see +\code{\link[ape]{read.dna}} or \code{read.nexus.data}, so see for more details over there. You may import data directly with \code{\link[ape]{read.dna}} or @@ -56,7 +56,7 @@ primates <- read.phyDat(file.path(fdir, "primates.dna"), \url{https://www.ncbi.nlm.nih.gov/blast/fasta.shtml} Felsenstein, J. (1993) Phylip (Phylogeny Inference Package) version 3.5c. Department of Genetics, University of Washington. -\url{https://evolution.genetics.washington.edu/phylip/phylip.html} +\url{https://phylipweb.github.io/phylip/} } \seealso{ \code{\link[ape]{read.dna}}, \code{\link[ape]{read.GenBank}}, diff --git a/man/superTree.Rd b/man/superTree.Rd index d9712024..38325c63 100644 --- a/man/superTree.Rd +++ b/man/superTree.Rd @@ -48,9 +48,9 @@ set.seed(1) bs <- bootstrap.phyDat(Laurasiatherian, FUN = function(x) upgma(dist.hamming(x)), bs=50) -mrp_st <- superTree(bs) +mrp_st <- superTree(bs, minit = 25, maxit=50) plot(mrp_st) -\dontrun{ +\donttest{ rf_st <- superTree(bs, method = "RF") spr_st <- superTree(bs, method = "SPR") } diff --git a/man/transferBootstrap.Rd b/man/transferBootstrap.Rd index 4a8ec1eb..fb714273 100644 --- a/man/transferBootstrap.Rd +++ b/man/transferBootstrap.Rd @@ -4,21 +4,24 @@ \alias{transferBootstrap} \title{Transfer Bootstrap} \usage{ -transferBootstrap(tree, BStrees) +transferBootstrap(tree, trees, phylo = TRUE, scale = TRUE) } \arguments{ \item{tree}{The tree on which edges the bootstrap values are plotted.} -\item{BStrees}{a list of trees (object of class "multiPhylo").} +\item{trees}{a list of trees (object of class "multiPhylo").} + +\item{phylo}{Logical, return a phylogentic tree with support value or a +vector of bootstrap values.} + +\item{scale}{scale the values.} } \value{ -\code{plotBS} returns silently a tree, i.e. an object of class -\code{phylo} with the bootstrap values as node labels. The argument -\code{BSTrees} is optional and if not supplied the labels supplied -in the \code{node.label} slot will be used. +a phylogentic tree (a phylo object) with bootstrap values assigned to +the node labels. } \description{ -\code{transferBootstrap} assignes transfer bootstrap (Lemoine et al. 2018) +\code{transferBootstrap} assigns transfer bootstrap (Lemoine et al. 2018) values to the (internal) edges. } \examples{ @@ -41,7 +44,8 @@ phylogenetic bootstrap in the era of big data. \emph{Nature}, \bold{556(7702)}, 452--456. } \seealso{ -\code{\link{plotBS}}, \code{\link{maxCladeCred}} +\code{\link{plotBS}}, \code{\link{maxCladeCred}}, +\code{\link{drawSupportOnEdges}} } \author{ Klaus Schliep \email{klaus.schliep@gmail.com} diff --git a/man/upgma.Rd b/man/upgma.Rd index b03bdfaa..ddf6be05 100644 --- a/man/upgma.Rd +++ b/man/upgma.Rd @@ -1,13 +1,16 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/distTree.R +% Please edit documentation in R/upgma.R \name{upgma} \alias{upgma} \alias{wpgma} -\title{UPGMA and WPGMA} +\alias{supgma} +\title{UPGMA, WPGMA and sUPGMA} \usage{ upgma(D, method = "average", ...) wpgma(D, method = "mcquitty", ...) + +supgma(D, tip.dates, trace = 0) } \arguments{ \item{D}{A distance matrix.} @@ -17,13 +20,19 @@ unambiguous abbreviation of) one of "ward", "single", "complete", "average", "mcquitty", "median" or "centroid". The default is "average".} \item{\dots}{Further arguments passed to or from other methods.} + +\item{tip.dates}{A named vector of sampling times associated to the tips.} + +\item{trace}{Show output during optimization (see details).} } \value{ A phylogenetic tree of class \code{phylo}. } \description{ -UPGMA and WPGMA clustering. Just a wrapper function around -\code{\link[stats]{hclust}}. +UPGMA and WPGMA clustering. UPGMA and WPGMA are a wrapper function around +\code{\link[stats]{hclust}} returning a \code{phylo} object. +\code{supgma} perform serial sampled UPGMA similar to Drummond and Rodrigo +(2000). } \examples{ @@ -32,10 +41,23 @@ dm <- dist.ml(Laurasiatherian) tree <- upgma(dm) plot(tree) +} +\references{ +Sneath, P. H., & Sokal, R. R. (1973). \emph{Numerical taxonomy. +The principles and practice of numerical classification.} + +Sokal, R. R., & Michener, C. D. (1958). A statistical method for evaluating +systematic relationships. \emph{University of Kansas Scientific Bulletin}, +v. 38. + +Drummond, A., & Rodrigo, A. G. (2000). Reconstructing genealogies of serial +samples under the assumption of a molecular clock using serial-sample UPGMA. +\emph{Molecular Biology and Evolution}, \bold{17(12)}, 1807-1815. } \seealso{ \code{\link{hclust}}, \code{\link{dist.hamming}}, \code{\link{NJ}}, -\code{\link{as.phylo}}, \code{\link{fastme}}, \code{\link{nnls.tree}} +\code{\link{as.phylo}}, \code{\link{fastme}}, \code{\link{nnls.tree}}, +\code{\link{rtt}} } \author{ Klaus Schliep \email{klaus.schliep@gmail.com} diff --git a/man/write.ancestral.Rd b/man/write.ancestral.Rd new file mode 100644 index 00000000..923ee026 --- /dev/null +++ b/man/write.ancestral.Rd @@ -0,0 +1,56 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ancestral.R +\name{write.ancestral} +\alias{write.ancestral} +\alias{ancestral} +\alias{print.ancestral} +\title{Export and convenience functions for ancestral reconstructions} +\usage{ +write.ancestral(x, file = "ancestral") + +ancestral(tree, align, prob) + +\method{print}{ancestral}(x, ...) +} +\arguments{ +\item{x}{an object of class ancestral.} + +\item{file}{a file name. File endings are added.} + +\item{tree}{an object of class phylo.} + +\item{align}{an object of class phyDat.} + +\item{prob}{an data.frame containing a matrix of posterior probabilities for +each state and site.} + +\item{...}{Further arguments passed to or from other methods.} +} +\value{ +\code{write.ancestral} returns the input x invisibly. +} +\description{ +\code{write.ancestral} allows to export ancestral reconstructions. It writes +out the tree, a tab delimited text file with the probabilities and the +alignment. \code{ancestral} generates an object of class ancestral. +} +\details{ +This allows also to read in reconstruction made by iqtree to use the +plotting capabilities of R. +} +\examples{ +data(Laurasiatherian) +fit <- pml_bb(Laurasiatherian[,1:100], "JC", rearrangement = "none") +anc_ml <- ancestral.pml(fit) +write.ancestral(anc_ml) +# Can be also results from iqtree +align <- read.phyDat("ancestral_align.fasta") +tree <- read.tree("ancestral_tree.nwk") +df <- read.table("ancestral.state", header=TRUE) +anc_ml_disc <- ancestral(tree, align, df) +plotAnc(anc_ml_disc, 20) +unlink(c("ancestral_align.fasta", "ancestral_tree.nwk", "ancestral.state")) +} +\seealso{ +\code{\link{ancestral.pml}}, \code{\link{plotAnc}} +} diff --git a/man/write.pml.Rd b/man/write.pml.Rd new file mode 100644 index 00000000..a7c1a285 --- /dev/null +++ b/man/write.pml.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/pml_generics.R +\name{write.pml} +\alias{write.pml} +\title{Export pml objects} +\usage{ +write.pml(x, file = tempfile(), ...) +} +\arguments{ +\item{x}{an object of class ancestral.} + +\item{file}{a file name. File endings are added.} + +\item{...}{Further arguments passed to or from other methods.} +} +\value{ +\code{write.pml} returns the input x invisibly. +} +\description{ +\code{write.pml} writes out the ML tree and the model parameters. +} +\examples{ +data(woodmouse) +fit <- pml_bb(woodmouse, "JC", rearrangement = "none") +write.pml(fit, "woodmouse") +unlink(c("woodmouse_pml.txt", "woodmouse_tree.nwk")) +} +\seealso{ +\code{\link{ancestral.pml}}, \code{\link{plotAnc}} +} diff --git a/po/R-phangorn.pot b/po/R-phangorn.pot new file mode 100644 index 00000000..3ca8e446 --- /dev/null +++ b/po/R-phangorn.pot @@ -0,0 +1,432 @@ +msgid "" +msgstr "" +"Project-Id-Version: phangorn 3.0.0.0\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2024-02-14 13:59+0100\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language-Team: LANGUAGE \n" +"Language: \n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=UTF-8\n" +"Content-Transfer-Encoding: 8bit\n" + +#: Densi.R:134 +msgid "x must be of class multiPhylo" +msgstr "" + +#: addConfidences.R:99 treedist.R:244 +msgid "trees have different labels" +msgstr "" + +#: ancestral_pml.R:238 bab.R:278 bab.R:423 baseFreq.R:35 fitch64.R:264 parsimony.R:83 phylo.R:1340 sankoff.R:50 sankoff.R:118 sankoff.R:160 +msgid "data must be of class phyDat" +msgstr "" + +#: ancestral_pml.R:402 +msgid "" +"Node labels are not unique, used makeNodeLabel(tree, ...) to create them!" +msgstr "" + +#: bootstrap.R:214 +msgid "tree has different labels" +msgstr "" + +#: candidate_tree.R:2 +msgid "tau must be >= 0!" +msgstr "" + +#: candidate_tree.R:45 +msgid "Argument tip.dates is missing!" +msgstr "" + +#: clanistic.R:430 +msgid "The variable names have changed" +msgstr "" + +#: clanistic.R:514 +msgid "contrasts are not defined for 0 degrees of freedom" +msgstr "" + +#: codon.R:60 codon.R:79 dist.p.R:73 distSeq.R:67 distSeq.R:115 distSeq.R:234 +msgid "x must be of class phyDat" +msgstr "" + +#: codon.R:61 +msgid "x must be a nucleotide sequence!" +msgstr "" + +#: delta.score.R:61 +msgid "return options are: all, mean, or sd" +msgstr "" + +#: dist.p.R:119 +msgid "Levels of x are not in levels of cost matrix!" +msgstr "" + +#: distTree.R:384 +msgid "tree must be binary" +msgstr "" + +#: distTree.R:588 +msgid "invalid splits method" +msgstr "" + +#: distTree.R:589 +msgid "ambiguous splits method" +msgstr "" + +#: draw_CI.R:2 superTree.R:4 transferBootstrap.R:34 +msgid "trees must be of class multiPhylo" +msgstr "" + +#: draw_CI.R:10 draw_CI.R:123 +msgid "All trees need to be rooted!" +msgstr "" + +#: fitch64.R:253 phylo.R:1334 plotBS.R:112 sankoff.R:157 +msgid "tree must be of class phylo" +msgstr "" + +#: hadamard.R:187 hadamard.R:272 +msgid "Hadamard conjugation works only efficient for n < 24" +msgstr "" + +#: hadamard.R:212 +msgid "obj must be of class phyDat" +msgstr "" + +#: hadamard.R:213 hadamard.R:265 hadamard.R:266 +msgid "Error" +msgstr "" + +#: hadamard.R:221 +msgid "4-state Hadamard conjugation works only efficient for n < 12" +msgstr "" + +#: hash_phylo.R:9 +msgid "x must be rooted" +msgstr "" + +#: linkfun.R:49 +msgid "y values must be 0 <= y <= 1" +msgstr "" + +#: linkfun.R:52 +msgid "non-integer #successes in a binomial glm!" +msgstr "" + +#: linkfun.R:54 +msgid "non-integer counts in a binomial glm!" +msgstr "" + +#: linkfun.R:59 +msgid "for the binomial family, y must be a vector of 0 and 1's" +msgstr "" + +#: linkfun.R:60 +msgid "" +"or a 2 column matrix where col 1 is no. successes and col 2 is no. failures" +msgstr "" + +#: modelTest.R:96 +msgid "Labels in tree and data differ!" +msgstr "" + +#: networx.R:721 networx.R:726 networx.R:731 +msgid "split.color must be same length as splits" +msgstr "" + +#: networx.R:739 +msgid "type='3D' requires the package 'rgl', plotting in '2D' instead!" +msgstr "" + +#: phyDat.R:101 phyDat.R:171 +msgid "Alignments must have same type!" +msgstr "" + +#: phyDat.R:170 +msgid "Alignments have different # of characters!" +msgstr "" + +#: phyDat.R:173 +msgid "Duplicated names!" +msgstr "" + +#: phyDat.R:257 phyDat.R:263 phyDat.R:265 +msgid "subscript out of bounds" +msgstr "" + +#: phyDat.R:386 +msgid "each site contains at least one ambiguous state!" +msgstr "" + +#: phyDat.R:544 +msgid "the first line of the file must contain the dimensions of the data" +msgstr "" + +#: phyDat2.R:94 +msgid "data object must contain taxa names" +msgstr "" + +#: phyDat2.R:127 +msgid "Either argument levels or contrast has to be supplied" +msgstr "" + +#: phyDat2.R:146 phyDat2.R:227 +msgid "" +"Found unknown characters (not supplied in levels). Deleted sites with " +"unknown states." +msgstr "" + +#: phyDat2.R:321 +msgid "Found unknown characters. Deleted sites with unknown states." +msgstr "" + +#: phyDat_conversion.R:301 +msgid "x must be a nucleotide sequence" +msgstr "" + +#: phyDat_conversion.R:309 +msgid "x must be a amino acid sequence" +msgstr "" + +#: phylo.R:491 +msgid "model must be a formula object" +msgstr "" + +#: phylo.R:769 phylo.R:1374 +msgid "F3x4 not available for this data type" +msgstr "" + +#: phylo.R:771 phylo.R:1376 +msgid "F1x4 not available for this data type" +msgstr "" + +#: phylo.R:773 phylo.R:1378 +msgid "F61 not available for this data type" +msgstr "" + +#: phylo.R:992 +msgid "Error Mkv" +msgstr "" + +#: phylo.R:1335 +msgid "tree must have edge weights" +msgstr "" + +#: phylo.R:1336 +msgid "tree must have unique labels!" +msgstr "" + +#: phylo.R:1342 +msgid "tip labels are not in data" +msgstr "" + +#: phylo.R:1658 phylo.R:2093 +msgid "tree must be rooted" +msgstr "" + +#: phylo.R:2046 +msgid "I unrooted the tree" +msgstr "" + +#: phylo.R:2080 +msgid "rooted / unrooted tree needs at least 2 / 3 tips" +msgstr "" + +#: phylo.R:2088 +msgid "" +"You can't optimize edges and rates at the same time, only edges are " +"optimized!" +msgstr "" + +#: phylo.R:2167 +msgid "only one rate class, ignored optGamma" +msgstr "" + +#: phylo.R:2171 +msgid "cannot estimate invariant sites and Mkv model, ignored optInv" +msgstr "" + +#: plotAnc.R:63 +msgid "Tree needs nodelabel, which match the labels of the alignment!" +msgstr "" + +#: plotAnc.R:95 +msgid "Length of color vector differs from number of levels!" +msgstr "" + +#: plotBS.R:14 +msgid "All trees need to be rooted for method 'MCC'!" +msgstr "" + +#: plotBS.R:122 +msgid "" +"You need to supply 'trees' or the tree needs support-values as node.label" +msgstr "" + +#: pmlPart.R:254 +msgid "x must be of class 'multiphyDat' or a list of 'phyDat' objects" +msgstr "" + +#: pmlPen.R:6 +msgid "object has to be of class pmlPart or pmlMix" +msgstr "" + +#: pml_bb.R:84 +msgid "Please supply a model!" +msgstr "" + +#: pml_bb.R:128 +msgid "Could not find model!" +msgstr "" + +#: pml_bb.R:130 +msgid "Error, found several models!" +msgstr "" + +#: pml_bb.R:204 +msgid "Some parameters are unknown" +msgstr "" + +#: pml_control.R:63 +msgid "value of 'epsilon' must be > 0" +msgstr "" + +#: pml_control.R:65 pml_control.R:78 +msgid "maximum number of iterations must be > 0" +msgstr "" + +#: pml_control.R:67 +msgid "tau must be > 0" +msgstr "" + +#: pml_control.R:80 +msgid "minimum number of iterations must be > 0" +msgstr "" + +#: pml_control.R:82 +msgid "number of iterations must be > 0" +msgstr "" + +#: pml_control.R:84 +msgid "proportion of rearrangenemts must be > 0" +msgstr "" + +#: read.nexus.partitions.R:75 +msgid "does not contain Charset!" +msgstr "" + +#: read.nexus.splits.R:317 +msgid "File does not contain network block, return only splits!" +msgstr "" + +#: read.nexus.splits.R:320 +msgid "File does not contain network block!" +msgstr "" + +#: read.nexus.splits.R:438 +msgid "Problem" +msgstr "" + +#: read.phyDat.R:90 +msgid "sequences have different length" +msgstr "" + +#: simSeq.R:99 +msgid "l must be greater than 0!" +msgstr "" + +#: simSeq.R:139 +msgid "levels have to be supplied if type is USER" +msgstr "" + +#: splits.R:126 +msgid "x and y have different labels!" +msgstr "" + +#: splits.R:156 +msgid "names do not match previous names" +msgstr "" + +#: splits.R:279 +msgid "No split object included!" +msgstr "" + +#: splits.R:436 +msgid "obj must be of class splits" +msgstr "" + +#: splits.R:472 +msgid "x must be of class splits" +msgstr "" + +#: splits.R:475 +msgid "y must be of class splits" +msgstr "" + +#: treeManipulation.R:19 +msgid "There are apparently two root edges in your tree" +msgstr "" + +#: treeManipulation.R:133 +msgid "tree needs edge length" +msgstr "" + +#: treeManipulation.R:224 +msgid "no node labels" +msgstr "" + +#: treeManipulation.R:540 +#, c-format +msgid "That would generate %d trees, and take up more than %d MB of memory!" +msgstr "" + +#: treeManipulation.R:544 +msgid "A tree must have at least two taxa." +msgstr "" + +#: treeManipulation.R:547 +msgid "An unrooted tree must have at least three taxa." +msgstr "" + +#: treeManipulation.R:645 +msgid "Can't find supplied node in the labels" +msgstr "" + +#: treeManipulation.R:855 +msgid "one tree has a different number of tips" +msgstr "" + +#: treeManipulation.R:858 +msgid "one tree has different tip labels" +msgstr "" + +#: treeRearrangement.R:117 +msgid "Not enough edges for NNI rearrangements" +msgstr "" + +#: treeRearrangement.R:202 +msgid "k is chosen too big" +msgstr "" + +#: treeRearrangement.R:215 +msgid "trees must be binary" +msgstr "" + +#: treedist.R:176 treedist.R:253 treedist.R:373 treedist.R:526 treedist.R:573 treedist.R:629 +msgid "Some trees are not binary. Result may not what you expect!" +msgstr "" + +#: treedist.R:263 +msgid "number of bipartitions given to C_sprdist are not the same" +msgstr "" + +#: treedist.R:363 treedist.R:621 treedist.R:688 +msgid "one tree is unrooted, unrooted both" +msgstr "" + +#: treedist.R:409 treedist.R:463 treedist.R:521 treedist.R:563 treedist.R:725 treedist.R:773 +msgid "some trees were rooted, unrooted all" +msgstr "" diff --git a/po/phangorn.pot b/po/phangorn.pot new file mode 100644 index 00000000..fc82b5f4 --- /dev/null +++ b/po/phangorn.pot @@ -0,0 +1,12 @@ +msgid "" +msgstr "" +"Project-Id-Version: phangorn 3.0.0.0\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2024-02-14 13:59+0100\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language-Team: LANGUAGE \n" +"Language: \n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=UTF-8\n" +"Content-Transfer-Encoding: 8bit\n" diff --git a/src/dupAtomMat.cpp b/src/dupAtomMat.cpp index 29390aa6..92f08b75 100644 --- a/src/dupAtomMat.cpp +++ b/src/dupAtomMat.cpp @@ -3,7 +3,7 @@ vecMap intVecMap; vecMap doubleVecMap; -vecMap charsexpVecMap; +vecMap charsexpVecMap; vecMap rawVecMap; // Rbyte is an alias of unsigned char extern "C" { @@ -13,9 +13,9 @@ SEXP grpDupAtomMat(SEXP x, SEXP MARGIN, SEXP fromLast) SEXP out; int* dim; int nGrps; - dim=INTEGER(getAttrib(x, R_DimSymbol)); - out = PROTECT(allocVector(INTSXP, dim[*INTEGER(MARGIN)-1])); - + dim=INTEGER(Rf_getAttrib(x, R_DimSymbol)); + out = PROTECT(Rf_allocVector(INTSXP, dim[*INTEGER(MARGIN)-1])); + switch (TYPEOF(x)) { case REALSXP: nGrps = doubleVecMap.grpDuplicatedMat (REAL(x), dim, dim+1, INTEGER(out), *INTEGER(MARGIN)==1, (bool)(*(LOGICAL(fromLast))) ); @@ -23,7 +23,7 @@ SEXP grpDupAtomMat(SEXP x, SEXP MARGIN, SEXP fromLast) case INTSXP: // factor type is also covered here // if(!inherits(x, "factor")) nGrps = intVecMap.grpDuplicatedMat (INTEGER(x), dim, dim+1, INTEGER(out), *INTEGER(MARGIN)==1, (bool)(*(LOGICAL(fromLast))) ); - // else {;} + // else {;} break; case LGLSXP: nGrps = intVecMap.grpDuplicatedMat (LOGICAL(x), dim, dim+1, INTEGER(out), *INTEGER(MARGIN)==1, (bool)(*(LOGICAL(fromLast))) ); @@ -32,23 +32,23 @@ SEXP grpDupAtomMat(SEXP x, SEXP MARGIN, SEXP fromLast) CharSEXP* charSexpPtr = new CharSEXP [ dim[0]*dim[1] ]; for(int i=dim[0]*dim[1]-1; i>=0; --i) charSexpPtr[i].sexp = STRING_ELT(x, i); - + nGrps = charsexpVecMap.grpDuplicatedMat (charSexpPtr, dim, dim+1, INTEGER(out), *INTEGER(MARGIN)==1, (bool)(*(LOGICAL(fromLast))) ); - + delete[] charSexpPtr; break; } case RAWSXP: nGrps = rawVecMap.grpDuplicatedMat (RAW(x), dim, dim+1, INTEGER(out), *INTEGER(MARGIN)==1, (bool)(*(LOGICAL(fromLast))) ); break; - default: - error("C function 'grpDupAtomMat' only accepts REALSXP, LGLSXP, INTSXP and STRSXP"); +// default: +// error("C function 'grpDupAtomMat' only accepts REALSXP, LGLSXP, INTSXP and STRSXP"); } - + SEXP nLevels; - nLevels = PROTECT(allocVector(INTSXP, 1)); + nLevels = PROTECT(Rf_allocVector(INTSXP, 1)); INTEGER(nLevels)[0] = nGrps; - setAttrib(out, install("nlevels"), nLevels); + Rf_setAttrib(out, Rf_install("nlevels"), nLevels); UNPROTECT(2); return out; } diff --git a/src/lessAndEqual.h b/src/lessAndEqual.h index 24ecec75..9ca42fe5 100644 --- a/src/lessAndEqual.h +++ b/src/lessAndEqual.h @@ -8,9 +8,9 @@ #include -/* - NOTE: R_NaString is a different SEXP than mkChar("NA"), but holding the same string "NA". - We will treat R_NaString to be smaller than every usual string, including mkChar("NA"). +/* + NOTE: R_NaString is a different SEXP than mkChar("NA"), but holding the same string "NA". + We will treat R_NaString to be smaller than every usual string, including mkChar("NA"). Real NaN becomes mkChar("NaN") by as.character(); Real -Inf becomes mkChar("-Inf") by as.character(); Real Inf becomes mkChar("Inf") by as.character(); @@ -20,37 +20,37 @@ class CharSEXP{ public: SEXP sexp; inline bool valid() {return( TYPEOF(sexp) == CHARSXP );} - + CharSEXP(SEXP x) { if (TYPEOF(x) == CHARSXP) sexp = x; - else error("CharSEXP should be initialized with a CHARSXP type object"); +// else error("CharSEXP should be initialized with a CHARSXP type object"); } - + CharSEXP() { - sexp = R_NaString; + sexp = R_NaString; } - - friend inline bool operator< (const CharSEXP& lhs, const CharSEXP& rhs) + + friend inline bool operator< (const CharSEXP& lhs, const CharSEXP& rhs) { if (lhs.sexp == R_NaString) return( rhs.sexp != R_NaString ); if (rhs.sexp == R_NaString) return(false); - return( + return( strcmp(const_cast(CHAR(lhs.sexp)), const_cast(CHAR(rhs.sexp)) )<0 - ); + ); } - - friend inline bool operator== (const CharSEXP& lhs, const CharSEXP& rhs) - { + + friend inline bool operator== (const CharSEXP& lhs, const CharSEXP& rhs) + { return (lhs.sexp == rhs.sexp); // R CHARSXP objects are cached (only one copy per string) } - + }; -/* for general T where operator< and operator== have been implemented; - * this is helpful for +/* for general T where operator< and operator== have been implemented; + * this is helpful for * (1) integers where NA's is just a special integer value; * (2) unsigned char, where NA's is converted to 00; * (3) CharSEXP, where NA's are properly handled by operator<. @@ -63,25 +63,25 @@ template /* double Assumptions: NaN < NA_real_ < -Inf < Finite numbers < Inf */ template <> -inline bool lessThan(const double& lhs, const double& rhs) +inline bool lessThan(const double& lhs, const double& rhs) { if (R_FINITE(lhs) && R_FINITE(rhs)) return lhs< rhs; // probably the most common case (both finite) - - bool rhsTest = R_IsNaN(rhs); // rhs = NaN + + bool rhsTest = R_IsNaN(rhs); // rhs = NaN if (R_IsNaN(lhs)) return !rhsTest; // lhs = NaN - + rhsTest = rhsTest || ISNA(rhs); // rhs <= NA_real_ if (ISNA(lhs)) return !rhsTest; // lhs = NA - + rhsTest = rhsTest || (rhs == R_NegInf); // rhs <= -Inf if (lhs == R_NegInf) return !rhsTest; // lhs = -Inf - - if(rhsTest) return false; // lhs is finite or +Inf but rhs <= -Inf + + if(rhsTest) return false; // lhs is finite or +Inf but rhs <= -Inf return R_FINITE(lhs); // lhs is finite or +Inf but rhs is +Inf } template <> -inline bool equalTo (const double& lhs, const double& rhs) +inline bool equalTo (const double& lhs, const double& rhs) {return( (lhs == rhs) || (ISNA(lhs) && ISNA(rhs)) || diff --git a/src/ml.c b/src/ml.c index d765530e..e7678e04 100644 --- a/src/ml.c +++ b/src/ml.c @@ -248,7 +248,7 @@ void lll3(SEXP dlist, double *eva, double *eve, double *evei, double *el, double SEXP PML0(SEXP dlist, SEXP EL, SEXP G, SEXP NR, SEXP NC, SEXP K, SEXP eig, SEXP bf, SEXP node, SEXP edge, SEXP NTips, SEXP nco, SEXP contrast, SEXP N){ int nr=INTEGER(NR)[0], nc=INTEGER(NC)[0], k=INTEGER(K)[0], i, indLL; int nTips = INTEGER(NTips)[0], *SC; - double *g=REAL(G), *tmp, logScaleEPS; + double *g=REAL(G), *tmp; //, logScaleEPS; SEXP TMP; double *eva, *eve, *evei; eva = REAL(VECTOR_ELT(eig, 0)); @@ -263,8 +263,8 @@ SEXP PML0(SEXP dlist, SEXP EL, SEXP G, SEXP NR, SEXP NC, SEXP K, SEXP eig, SEXP for(i=0; i + + + + + + + + + + + + + + + + + + + + + + + + + + + +t1 +t2 +t3 +t4 + + + + + + + + + + + + + + + + + + + + + + + + + +a +g +c +t +- + + diff --git a/tests/testthat/_snaps/plot_ancestral/seqlogo.svg b/tests/testthat/_snaps/plot_ancestral/seqlogo.svg new file mode 100644 index 00000000..e69de29b diff --git a/tests/testthat/_snaps/plot_networx/plot-networx.svg b/tests/testthat/_snaps/plot_networx/plot-networx.svg new file mode 100644 index 00000000..936397ca --- /dev/null +++ b/tests/testthat/_snaps/plot_networx/plot-networx.svg @@ -0,0 +1,54 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +1 +2 +3 +4 +5 + + diff --git a/tests/testthat/test_plot_ancestral.R b/tests/testthat/test_plot_ancestral.R new file mode 100644 index 00000000..3b79e5b2 --- /dev/null +++ b/tests/testthat/test_plot_ancestral.R @@ -0,0 +1,22 @@ +tree <- read.tree(text = "((t1:1,t2:1):1,(t3:1,t4:1):1);") +tree2 <- read.tree(text = "(((t1:1,t2:1):1,t3:2):1,t4:3);") +dat <- matrix(c("a", "a", + "a", "t", + "t", "a", + "t", "t"), byrow = TRUE, nrow = 4L, + dimnames = list(c("t1", "t2", "t3", "t4"), NULL)) +dna <- phyDat(dat) +fit <- pml(tree, dna) +test_ml <- ancestral.pml(fit, type = "ml") + + +test_that("Pie_plots works", { + pie_ancestral <- function() plotAnc(test_ml) + vdiffr::expect_doppelganger("Pie plots", pie_ancestral) +}) + + +test_that("plotSeqLogo works", { + seq_logo <- function() plotSeqLogo(test_ml) + vdiffr::expect_doppelganger("SeqLogo", seq_logo) +}) diff --git a/tests/testthat/test_plot_networx.R b/tests/testthat/test_plot_networx.R new file mode 100644 index 00000000..14d68a09 --- /dev/null +++ b/tests/testthat/test_plot_networx.R @@ -0,0 +1,6 @@ +net <- as.networx(allCircularSplits(5)) + +test_that("plot.networx works", { + networx_plot <- function() plot(net) + vdiffr::expect_doppelganger("plot.networx", networx_plot) +}) diff --git a/tests/testthat/test_plot_pml.R b/tests/testthat/test_plot_pml.R new file mode 100644 index 00000000..e25e7c86 --- /dev/null +++ b/tests/testthat/test_plot_pml.R @@ -0,0 +1,23 @@ +tree <- read.tree(text = "((t1:1,t2:1):1,(t3:1,t4:1):1);") +tree2 <- read.tree(text = "((t1:1.1,t2:1.1):1,(t3:1,t4:1):1.1);") +dat <- matrix(c("a", "a", + "a", "t", + "t", "a", + "t", "t"), byrow = TRUE, nrow = 4L, + dimnames = list(c("t1", "t2", "t3", "t4"), NULL)) +dna <- phyDat(dat) +fit <- pml(tree, dna) + +test_that("plot.pml works", { + pml_plot <- function() plot(fit) + vdiffr::expect_doppelganger("plot.pml", pml_plot) +}) + + +trees <- c(tree, tree2) |> .compressTipLabel() + +test_that("densiTree works", { + densi_plot <- function() densiTree(trees, type="phylogram", width=2, + jitter=list(amount=.1, random=FALSE), alpha=1) + vdiffr::expect_doppelganger("densiTree", densi_plot) +}) diff --git a/vignettes/AdvancedFeatures.Rmd b/vignettes/AdvancedFeatures.Rmd index 3e73e9c7..0d94ecec 100644 --- a/vignettes/AdvancedFeatures.Rmd +++ b/vignettes/AdvancedFeatures.Rmd @@ -73,7 +73,7 @@ contrast gapsdata3 <- phyDat(data, type="USER", contrast=contrast) gapsdata3 ``` -Here we defined "n" as a state which can be any nucleotide, but not a gap. "-" and "?" can be any state including a gap. +Here we defined "n" as a state which can be any nucleotide, but not a gap. And "?" can be any state including a gap. These data can be used in all functions available in _phangorn_ to compute distance matrices or perform parsimony and maximum likelihood analysis. diff --git a/vignettes/Ancestral.Rmd b/vignettes/Ancestral.Rmd index b00cc890..caafacb8 100644 --- a/vignettes/Ancestral.Rmd +++ b/vignettes/Ancestral.Rmd @@ -18,21 +18,22 @@ vignette: | knitr::opts_chunk$set(fig.width=6, fig.height=4) #, global.par=TRUE options(digits = 4) +suppressPackageStartupMessages(library(phangorn)) ``` - # Introduction These notes describe the ancestral sequence reconstruction using the _phangorn_ package [@Schliep2011]. _phangorn_ provides several methods to estimate ancestral character states with either Maximum Parsimony (MP) or Maximum Likelihood (ML). For more background on all the methods see e.g. [@Felsenstein2004] or [@Yang2006]. # Parsimony reconstructions + To reconstruct ancestral sequences we first load some data and reconstruct a tree: ```{r parsimony} library(phangorn) fdir <- system.file("extdata/trees", package = "phangorn") primates <- read.phyDat(file.path(fdir, "primates.dna"), format = "interleaved") -tree <- pratchet(primates, trace=0) |> acctran(primates) +tree <- pratchet(primates, trace=0) |> acctran(primates) |> makeNodeLabel() parsimony(tree, primates) ``` @@ -46,37 +47,28 @@ anc.mpr <- ancestral.pars(tree, primates, "MPR") All the ancestral reconstructions for parsimony are based on the fitch algorithm and so far only bifurcating trees are allowed. However trees can get pruned afterwards using the function `multi2di` from _ape_. -The `seqLogo` function from the _seqLogo_ package from Bioconductor provides a neat way to show proportions of a nucleotides of ancestral states (see figure 1). - -```{r seqLogo, fig.cap="Fig 1. Ancestral reconstruction for a node.", eval=FALSE} -library(seqLogo) -seqLogo( t(subset(anc.mpr, getRoot(tree), 1:20)[[1]]), ic.scale=FALSE) -``` -![](seqLogo.png) +The `plotSeqLogo` function is a wrapper around the from the _ggseqlogo_ function the the _ggseqlogo_ package [@ggseqlogo] and provides a simple way to show proportions of a nucleotides of ancestral states (see figure 1). -You may need to install _seqLogo_ before -```{r, eval=FALSE} -if (!requireNamespace("BiocManager", quietly = TRUE)) - install.packages("BiocManager") -BiocManager::install("seqLogo") +```{r seqLogo, fig.cap="Fig 1. Ancestral reconstruction for a node.", eval=TRUE} +#library(seqLogo) +#seqLogo( t(subset(anc.mpr, getRoot(tree), 1:20)[[1]]), ic.scale=FALSE) +plotSeqLogo(anc.mpr, node=getRoot(tree), 1, 20) ``` - - ```{r MPR, fig.cap="Fig 2. Ancestral reconstruction using MPR."} -plotAnc(tree, anc.mpr, 17) +plotAnc(anc.mpr, 17) title("MPR") ``` ```{r ACCTRAN, fig.cap="Fig 3. Ancestral reconstruction using ACCTRAN."} -plotAnc(tree, anc.acctran, 17) +plotAnc(anc.acctran, 17) title("ACCTRAN") ``` # Likelihood reconstructions -_phangorn_ also offers the possibility to estimate ancestral states using a ML. +_phangorn_ also offers the possibility to estimate ancestral states using ML. The advantages of ML over parsimony is that the reconstruction accounts for different edge lengths. -So far only a marginal construction is implemented (see [@Yang2006]). +So far only a marginal construction is implemented (see [@Yang2006][@Koshi1996]) and no joint reconstruction [@Pupko2000]. ```{r fit_ML} fit <- pml(tree, primates) fit <- optim.pml(fit, model="F81", control = pml.control(trace=0)) @@ -97,18 +89,18 @@ anc.bayes <- ancestral.pml(fit, "bayes") ``` The differences of the two approaches for a specific site (17) are represented in the following figures. ```{r plotML, fig.cap="Fig 4. Ancestral reconstruction the using the maximum likelihood."} -plotAnc(tree, anc.ml, 17) +plotAnc(anc.ml, 17) title("ML") ``` ```{r plotB, fig.cap="Fig 5. Ancestral reconstruction using (empirical) Bayes."} -plotAnc(tree, anc.bayes, 17) +plotAnc(anc.bayes, 17) title("Bayes") ``` # Fitting for discrete comparative data Often have already a phylogeny and only want estimate the ancestral reconstruction for this tree. -This is a common problem in phylogentic comparative methods and we can use the function *ace* in the ape [@Paradis2018], *fitDiscrete* in the geiger [@Pennell2014] or *fitMK* in the phytools [@Revell2012] package. Here we want fit these models using *optim.pml*. +This is a common problem in phylogentic comparative methods and we can use the function *ace* in the ape [@Paradis2018], *fitDiscrete* in the geiger [@Pennell2014] or *fitMK* in the phytools [@Revell2012] package. Here we want to show how to fit these models using *optim.pml*. First we load a tree and create some data. ```{r read_geospiza_data} @@ -151,7 +143,7 @@ all.equal(fit_SYM$logLik, fit_ace$loglik+log(1/3)) ```{r SYM_reconstruction} anc_SYM <- ancestral.pml(fit_SYM, "ml") -plotAnc(bird.orders, anc_SYM) +plotAnc(anc_SYM) ``` More complicated models can be applied using defining the rate matrix as shown in the vignette _Markov models and transition rate matrices_. diff --git a/vignettes/IntertwiningTreesAndNetworks.Rmd b/vignettes/IntertwiningTreesAndNetworks.Rmd index 797c55d8..3551722e 100644 --- a/vignettes/IntertwiningTreesAndNetworks.Rmd +++ b/vignettes/IntertwiningTreesAndNetworks.Rmd @@ -30,7 +30,7 @@ knitr::knit_hooks$set(small.mar=function(before, options, envir){ -*Description:* This script provides examples of the new functions available in the phangorn library to 'intertwine' trees and networks, i.e. compare trees and networks and data transferrance. It also provides a step-by-step guide for users new to R. +*Description:* This script provides examples of the new functions available in the phangorn library to 'intertwine' trees and networks, i.e. compare trees and networks and data transference. It also provides a step-by-step guide for users new to R. *Methodological advancement:* The major advancement in this phangorn update is the introduction of a generic network object with a wide range of related transfer and analysis functions. These new functions provide the first means to directly transfer information amongst a wide range of phylogenetic trees and networks, as well as means to visualize and further analyze this information. This should provide a platform for individuals to easily conduct tree-network comparisons and stimulate further function development by the community. diff --git a/vignettes/Morphological.Rmd b/vignettes/Morphological.Rmd index 76124e73..76c99cc3 100644 --- a/vignettes/Morphological.Rmd +++ b/vignettes/Morphological.Rmd @@ -40,15 +40,15 @@ mm <- read.csv(file.path(fdir, "mites.csv"), row.names = 1) mm_pd <- phyDat(as.matrix(mm), type = "USER", levels = 0:7) ``` The data can then be written into a _nexus_ file: -```{r write nexus, eval=FALSE} +```{r write_nexus, eval=FALSE} write.phyDat(mm_pd, file.path(fdir, "mites.nex"), format = "nexus") ``` Reading in a _nexus_ file is even easier than reading in a _csv_ file: -```{r, read nexus} +```{r, read_nexus} mm_pd <- read.phyDat(file.path(fdir, "mites.nex"), format = "nexus", type = "STANDARD") ``` After reading in the _nexus_ file, we have the states 0:9, but the data only has the states 0:7. Here is one possibility to change the contrast matrix: -``` {r contrast matrix} +``` {r contrast_matrix} contrast <- matrix(data = c(1,0,0,0,0,0,0,0,0, 0,1,0,0,0,0,0,0,0, 0,0,1,0,0,0,0,0,0, @@ -104,17 +104,19 @@ mm_tree_rooted <- root(mm_tree, outgroup = "C._cymba", resolve.root = TRUE, ## Plot trees -With `plotBS`, we can either plot all of the trees with their respective edge support, or we can subset to only get a certain tree. It is also possible to save the plots as _.pdf_ (or various other formats, e.g. svg, png, tiff) file. `digits` is an argument to determine the number of digits shown for the bootstrap values. -```{r plot_trees, eval=FALSE} + +With `plotBS` we plot a tree with their respective edge support. It is also possible to save the plots as _.pdf_ (or various other formats, e.g. svg, png, tiff) file. `digits` is an argument to determine the number of digits shown for the bootstrap values. +```{r plot_trees, eval=FALSE} # subsetting for tree nr. 9 plotBS(mm_tree_rooted[[9]], digits = 2) # save plot as pdf pdf(file = "mm_rooted.pdf") -plotBS(mm_tree_rooted, digits = 2) +plotBS(mm_tree_rooted[[9]], digits = 2) dev.off() ``` diff --git a/vignettes/Trees.Rmd b/vignettes/Trees.Rmd index 7e69eb81..93df697a 100644 --- a/vignettes/Trees.Rmd +++ b/vignettes/Trees.Rmd @@ -226,8 +226,7 @@ write.tree(tree_tfbs, "primates.tree") ## Molecular dating with a strict clock for ultrametric and tipdated phylogenies -When we assume a "molecular clock" phylogenies can be used to infer divergence times [@Zuckerkandl1965]. We implemented a strict clock as described in [@Felsenstein2004], p. 266, allowing to infer ultrametric and tip-dated phylogenies. We need a starting tree that fulfills the assumptions, so either the tree has to be ultrametric, or the constraints given by the tip dates. -For an ultrametric starting tree we can use an UPGMA or WPGMA tree. +When we assume a "molecular clock" phylogenies can be used to infer divergence times [@Zuckerkandl1965]. We implemented a strict clock as described in [@Felsenstein2004], p. 266, allowing to infer ultrametric and tip-dated phylogenies. The function `pml_bb` ensures that the tree is ultrametric, or the constraints given by the tip dates are fulfilled. That differs from the function `optim.pml` where th tree supplied to the function has to fulfill the constraints. In this case for an ultrametric starting tree we can use an UPGMA or WPGMA tree. ```{r strict_primates, echo=TRUE, cache=TRUE} fit_strict <- pml_bb(primates, model="HKY+G(4)", method="ultrametric", rearrangement="NNI", control = pml.control(trace = 0)) @@ -249,16 +248,20 @@ head(dates) ``` Again we use the `pml_bb` function, which optimizes the tree given the constraints of the `tip.dates` vector. ```{r tipdated_fit} -fit_td <- pml_bb(H3N2, model="GTR+G(4)", method="tipdated", tip.dates=dates, +fit_td <- pml_bb(H3N2, model="HKY+I", method="tipdated", tip.dates=dates, rearrangement="NNI", control = pml.control(trace = 0)) +fit_td ``` +While the loglikelihood is lower than for an unrooted tree, we have to keep in mind that rooted trees use less parameters. +In unrooted trees we estimate one edge length parameter for each tree, for ultrametric trees we only estimate a parameter for each internal node and for tipdated trees we have one additional parameter for the rate. The rate is here comparable to the slope fo the tip-to-root regression in programs like *TempEst* [@TempEst]. + + + And at last we plot the tree with a timescale. ```{r tipdated_plot} -plot(fit_td, show.tip.label = FALSE) +plot(fit_td, align.tip.label=TRUE) ``` -While the loglikelihood is lower than for an unrooted tree, we have to keep in mind that rooted trees use less parameters. -In unrooted trees we estimate one edge length parameter for each tree, for ultrametric trees we only estimate a parameter for each internal node and for tipdated trees we have one additional parameter for the rate. \newpage diff --git a/vignettes/phangorn.bib b/vignettes/phangorn.bib index ff511390..c544019b 100644 --- a/vignettes/phangorn.bib +++ b/vignettes/phangorn.bib @@ -838,6 +838,19 @@ @Article{ Mathews2010 } +@article {Pfingstl2023, + author = "Tobias Pfingstl and Iris Bardel-Kahr and Klaus Schliep", + title = "One step closer but still far from solving the puzzle – The phylogeny of marine associated mites (Acari, Oribatida, Ameronothroidea) inferred from morphological and molecular genetic data", + journal = "Contributions to Zoology", + year = "2023", + publisher = "Brill", + address = "Leiden, The Netherlands", + doi = "https://doi.org/10.1163/18759866-bja10043", + pages= "1 - 33" +} + + + @article {Schliep2017, author = "Schliep, Klaus and Potts, Alastair J. and Morrison, David A. and Grimm, Guido W.", title = "Intertwining phylogenetic trees and networks", @@ -1328,3 +1341,37 @@ @article{Ota2000 } +@article{Pupko2000, + author = {Pupko, Tal and Pe, Itsik and Shamir, Ron and Graur, Dan}, + title = "{A Fast Algorithm for Joint Reconstruction of Ancestral Amino Acid Sequences}", + journal = {Molecular Biology and Evolution}, + volume = {17}, + number = {6}, + pages = {890-896}, + year = {2000}, + month = {06}, + issn = {0737-4038}, + doi = {10.1093/oxfordjournals.molbev.a026369}, +} + + +@article{Koshi1996, + author = {Koshi, Jeffrey M. and Goldstein, Richard A.}, + title = "{Probabilistic reconstruction of ancestral protein sequences}", + journal = {Journal of Molecular Evolution}, + volume = {42}, + number = {2}, + pages = {313-320}, + year = {1996}, + doi = {10.1007/BF02198858}, +} + + +@Manual{ggseqlogo, + title = {ggseqlogo: A 'ggplot2' Extension for Drawing Publication-Ready Sequence +Logos}, + author = {Omar Wagih}, + year = {2024}, + note = {R package version 0.2}, + url = {https://CRAN.R-project.org/package=ggseqlogo}, + } diff --git a/vignettes/seqLogo.png b/vignettes/seqLogo.png deleted file mode 100644 index ac48532d..00000000 Binary files a/vignettes/seqLogo.png and /dev/null differ