From ba42574a62539d99f092c93480b31576e6676a44 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Th=C3=A9otime=20Pralas?=
 <151254073+thpralas@users.noreply.github.com>
Date: Mon, 10 Jun 2024 11:17:57 +0300
Subject: [PATCH] Use Rbiom unifrac implementation instead of runUnifrac (#523)

Co-authored-by: Leo Lahti <leo.lahti@iki.fi>
Co-authored-by: TuomasBorman <tvborm@utu.fi>
Co-authored-by: Tuomas Borman <60338854+TuomasBorman@users.noreply.github.com>
---
 DESCRIPTION                    |   5 +-
 NAMESPACE                      |   5 +-
 NEWS                           |   3 +-
 R/calculateUnifrac.R           | 294 +++++++--------------------------
 man/calculateUnifrac.Rd        |  49 +-----
 tests/testthat/test-5Unifrac.R | 117 ++++++-------
 6 files changed, 119 insertions(+), 354 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index e19ec6b91..35a4c6914 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: mia
 Type: Package
-Version: 1.13.20
+Version: 1.13.21
 Authors@R:
     c(person(given = "Felix G.M.", family = "Ernst", role = c("aut"),
              email = "felix.gm.ernst@outlook.com",
@@ -68,7 +68,8 @@ Imports:
     tidyr,
     bluster,
     MatrixGenerics,
-    mediation
+    mediation,
+    rbiom
 Suggests:
     testthat,
     knitr,
diff --git a/NAMESPACE b/NAMESPACE
index 4f7de7b58..d392d1279 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -323,11 +323,7 @@ importFrom(ape,cophenetic.phylo)
 importFrom(ape,drop.tip)
 importFrom(ape,has.singles)
 importFrom(ape,is.rooted)
-importFrom(ape,node.depth)
-importFrom(ape,node.depth.edgelength)
-importFrom(ape,prop.part)
 importFrom(ape,read.tree)
-importFrom(ape,reorder.phylo)
 importFrom(ape,root)
 importFrom(bluster,clusterRows)
 importFrom(decontam,isContaminant)
@@ -344,6 +340,7 @@ importFrom(dplyr,rename)
 importFrom(dplyr,select)
 importFrom(dplyr,tally)
 importFrom(mediation,mediate)
+importFrom(rbiom,unifrac)
 importFrom(rlang,":=")
 importFrom(rlang,sym)
 importFrom(scuttle,sumCountsAcrossFeatures)
diff --git a/NEWS b/NEWS
index adc135ddc..23badbede 100644
--- a/NEWS
+++ b/NEWS
@@ -136,4 +136,5 @@ Changes in version 1.13.x
 + Fix bug in mergeFeaturesByPrevalence
 + new aliases calculateDPCoA to getDPCoA, calculateNMDS to getNMDS, calculateRDA to getRDA, 
 calculateCCA to getCCA
-+ add informative error message in rarefyAssay on assays with strictly-negative values 
\ No newline at end of file
++ add informative error message in rarefyAssay on assays with strictly-negative values 
++ Use rbiom package in unifrac implementation
diff --git a/R/calculateUnifrac.R b/R/calculateUnifrac.R
index 0be2fcf3a..322c12fee 100644
--- a/R/calculateUnifrac.R
+++ b/R/calculateUnifrac.R
@@ -1,8 +1,8 @@
 #' Calculate weighted or unweighted (Fast) Unifrac distance
 #'
-#' This function calculates the (Fast) Unifrac distance for all sample-pairs
+#' This function calculates the Unifrac distance for all sample-pairs
 #' in a \code{\link[TreeSummarizedExperiment:TreeSummarizedExperiment-class]{TreeSummarizedExperiment}}
-#' object.
+#' object. The function utilizes \code{\link[rbiom:unifrac]{rbiom:unifrac()}}.
 #'
 #' Please note that if \code{calculateUnifrac} is used as a \code{FUN} for
 #' \code{runMDS}, the argument \code{ntop} has to be set to \code{nrow(x)}.
@@ -48,16 +48,6 @@
 #'   considers presence/absence. Default is \code{FALSE}, meaning the
 #'   unweighted-Unifrac distance is calculated for all pairs of samples.
 #'
-#' @param normalized \code{TRUE} or \code{FALSE}: Should the output be
-#'   normalized such that values range from 0 to 1 independent of branch length
-#'   values? Default is \code{TRUE}. Note that (unweighted) \code{Unifrac} is
-#'   always normalized by total branch-length, and so this value is ignored when
-#'   \code{weighted == FALSE}.
-#'
-#' @param BPPARAM A
-#'   \code{\link[BiocParallel:BiocParallelParam-class]{BiocParallelParam}}
-#'   object specifying whether the Unifrac calculation should be parallelized.
-#'
 #' @param transposed Logical scalar, is x transposed with cells in rows, i.e., 
 #'   is Unifrac distance calculated based on rows (FALSE) or columns (TRUE).
 #'   (By default: \code{transposed = FALSE})
@@ -69,15 +59,6 @@
 #' @references
 #' \url{http://bmf.colorado.edu/unifrac/}
 #'
-#' The main implementation (Fast Unifrac) is adapted from the algorithm's
-#' description in:
-#'
-#' Hamady, Lozupone, and Knight,
-#' ``\href{http://www.nature.com/ismej/journal/v4/n1/full/ismej200997a.html}{Fast
-#' UniFrac:} facilitating high-throughput phylogenetic analyses of microbial
-#' communities including analysis of pyrosequencing and PhyloChip data.'' The
-#' ISME Journal (2010) 4, 17--27.
-#'
 #' See also additional descriptions of Unifrac in the following articles:
 #'
 #' Lozupone, Hamady and Knight, ``Unifrac - An Online Tool for Comparing
@@ -95,22 +76,17 @@
 #'
 #' @export
 #'
-#' @author
-#' Paul J. McMurdie.
-#' Adapted for mia by Felix G.M. Ernst
-#'
 #' @examples
 #' data(esophagus)
 #' library(scater)
 #' calculateUnifrac(esophagus, weighted = FALSE)
 #' calculateUnifrac(esophagus, weighted = TRUE)
-#' calculateUnifrac(esophagus, weighted = TRUE, normalized = FALSE)
 #' # for using calculateUnifrac in conjunction with runMDS the tree argument
 #' # has to be given separately. In addition, subsetting using ntop must
 #' # be disabled
 #' esophagus <- runMDS(esophagus, FUN = calculateUnifrac, name = "Unifrac",
 #'                     tree = rowTree(esophagus),
-#'                     exprs_values = "counts",
+#'                     assay.type = "counts",
 #'                     ntop = nrow(esophagus))
 #' reducedDim(esophagus)
 NULL
@@ -118,23 +94,21 @@ NULL
 #' @rdname calculateUnifrac
 #' @export
 setGeneric("calculateUnifrac", signature = c("x", "tree"),
-           function(x, tree, ... )
-             standardGeneric("calculateUnifrac"))
+            function(x, tree, ... )
+                standardGeneric("calculateUnifrac"))
 
 #' @rdname calculateUnifrac
 #' @export
 setMethod("calculateUnifrac", signature = c(x = "ANY", tree = "phylo"),
-    function(x, tree, weighted = FALSE, normalized = TRUE,
-             BPPARAM = SerialParam(), ...){
+    function(x, tree, weighted = FALSE, ...){
         if(is(x,"SummarizedExperiment")){
-           stop("When providing a 'tree', please provide a matrix-like as 'x'",
+            stop("When providing a 'tree', please provide a matrix-like as 'x'",
                 " and not a 'SummarizedExperiment' object. Please consider ",
                 "combining both into a 'TreeSummarizedExperiment' object.",
                 call. = FALSE) 
         }
         .calculate_distance(x, FUN = runUnifrac, tree = tree,
-                            weighted = weighted, normalized = normalized,
-                            BPPARAM = BPPARAM, ...)
+                            weighted = weighted, ...)
     }
 )
 
@@ -144,10 +118,10 @@ setMethod("calculateUnifrac", signature = c(x = "ANY", tree = "phylo"),
 #'
 #' @export
 setMethod("calculateUnifrac",
-          signature = c(x = "TreeSummarizedExperiment",
-                        tree = "missing"),
+        signature = c(x = "TreeSummarizedExperiment",
+                    tree = "missing"),
     function(x, assay.type = assay_name, assay_name = exprs_values, exprs_values = "counts", 
-             tree_name = "phylo", transposed = FALSE, ...){
+            tree_name = "phylo", transposed = FALSE, ...){
         # Check assay.type and get assay
         .check_assay_present(assay.type, x)
         mat <- assay(x, assay.type)
@@ -191,28 +165,19 @@ setMethod("calculateUnifrac",
         links[ links$whichTree != tree_name, ] <- NA
         # Take only nodeLabs
         links <- links[ , "nodeLab" ]
-        calculateUnifrac(mat, tree = tree, nodeLab = links, ...)
+        res <- calculateUnifrac(mat, tree = tree, nodeLab = links, ...)
+        return(res)
     }
 )
 
-################################################################################
-# Fast Unifrac for R.
-# Adapted from The ISME Journal (2010) 4, 17-27; doi:10.1038/ismej.2009.97
-#
-# adopted from original implementation in phyloseq implemented by
-# Paul J. McMurdie (https://github.com/joey711/phyloseq)
 ################################################################################
 #' @rdname calculateUnifrac
 #'
-#' @importFrom ape prop.part reorder.phylo node.depth node.depth.edgelength
-#' @importFrom utils combn
-#' @importFrom stats as.dist
-#' @importFrom BiocParallel SerialParam register bplapply bpisup bpstart bpstop
-#' @importFrom DelayedArray getAutoBPPARAM setAutoBPPARAM
-#'
+#' @importFrom ape drop.tip
+#' @importFrom rbiom unifrac
 #' @export
-runUnifrac <- function(x, tree, weighted = FALSE, normalized = TRUE,
-                       nodeLab = NULL, BPPARAM = SerialParam(), ...){
+runUnifrac <- function(
+        x, tree, weighted = FALSE, nodeLab = NULL, ...){
     # Check x
     if( !is.matrix(as.matrix(x)) ){
         stop("'x' must be a matrix", call. = FALSE)
@@ -228,25 +193,23 @@ runUnifrac <- function(x, tree, weighted = FALSE, normalized = TRUE,
     if(!.is_a_bool(weighted)){
         stop("'weighted' must be TRUE or FALSE.", call. = FALSE)
     }
-    if(!.is_a_bool(normalized)){
-        stop("'normalized' must be TRUE or FALSE.", call. = FALSE)
-    }
     if(is.null(colnames(x)) || is.null(rownames(x))){
         stop("colnames and rownames must not be NULL", call. = FALSE)
     }
     # nodeLab should be NULL or character vector specifying links between 
     # rows and tree labels
     if( !(is.null(nodeLab) ||
-        (is.character(nodeLab) && length(nodeLab) == nrow(x) &&
-        all(nodeLab[ !is.na(nodeLab) ] %in% c(tree$tip.label)))) ){
-        stop("'nodeLab' must be NULL or character specifying links between ",
-             "abundance table and tree labels.", call. = FALSE)
+            (is.character(nodeLab) && length(nodeLab) == nrow(x) &&
+            all(nodeLab[ !is.na(nodeLab) ] %in% c(tree$tip.label)))) ){
+        stop(
+            "'nodeLab' must be NULL or character specifying links between ",
+            "abundance table and tree labels.", call. = FALSE)
     }
     # check that matrix and tree are compatible
-    if( is.null(nodeLab) && 
-        !all(rownames(x) %in% c(tree$tip.label)) ) {
-        stop("Incompatible tree and abundance table! Please try to provide ",
-             "'nodeLab'.", call. = FALSE)
+    if( is.null(nodeLab) && !all(rownames(x) %in% c(tree$tip.label)) ) {
+        stop(
+            "Incompatible tree and abundance table! Please try to provide ",
+            "'nodeLab'.", call. = FALSE)
     }
     # Merge rows, so that rows that are assigned to same tree node are agglomerated
     # together. If nodeLabs were provided, merge based on those. Otherwise merge
@@ -254,144 +217,52 @@ runUnifrac <- function(x, tree, weighted = FALSE, normalized = TRUE,
     if( is.null(nodeLab) ){
         nodeLab <- rownames(x)
     }
-    # Merge assay
+    # Prune tree if there are nodes that cannot be found from tips or if there
+    # are tips that cannot be found from abundance matrix. It might be
+    # that certain row is linked to internal node or that the tree has extra
+    # tips that do not match with rows (e.g. after subsetting).
+    if( any( !nodeLab %in% tree$tip.label ) ||
+            any( !tree$tip.label %in% nodeLab) ){
+        tree <- .prune_tree(tree, nodeLab)
+        warning("Pruning tree...", call. = FALSE)
+    }
+    # If node labels cannot be found from tips even after pruning, give error.
+    # This kind of tree cannot be used in unifrac since it expects that every
+    # row is linked to tips.
+    if( any( !nodeLab %in% tree$tip.label ) ){
+        stop(
+            "Unifrac cannot be calculated since tree is not compatible. ",
+            "Each row must be linked to tip of the tree.", call. = FALSE)
+    }
+    # Merge assay so that each row represent single tip. It might be that
+    # multiple rows are linked to single tip.
     x <- .merge_assay_by_rows(x, nodeLab, ...)
-    # Modify tree
+    # Modify tree so that it will become rooted.
     tree <- .norm_tree_to_be_rooted(tree, rownames(x))
     # Remove those tips that are not present in the data
     if( any(!tree$tip.label %in% rownames(x)) ){
-        tree <- ape::drop.tip(
+        tree <- drop.tip(
             tree, tree$tip.label[!tree$tip.label %in% rownames(x)])
-        warning("The tree is pruned so that tips that cannot be found from ", 
-                "the abundance matrix are removed.", call. = FALSE)
-    }
-    #
-    old <- getAutoBPPARAM()
-    setAutoBPPARAM(BPPARAM)
-    on.exit(setAutoBPPARAM(old))
-    if (!(bpisup(BPPARAM) || is(BPPARAM, "MulticoreParam"))) {
-        bpstart(BPPARAM)
-        on.exit(bpstop(BPPARAM), add = TRUE)
+        warning(
+            "The tree is pruned so that tips that cannot be found from ", 
+            "the abundance matrix are removed.", call. = FALSE)
     }
-    #
-    # create N x 2 matrix of all pairwise combinations of samples.
-    spn <- utils::combn(colnames(x), 2, simplify = FALSE)
-
-    ########################################
-    # Build the requisite matrices as defined
-    # in the Fast Unifrac article.
-    ########################################
-    ## This only needs to happen once in a call to Unifrac.
-    ## Notice that A and B do not appear in this section.
-    # Begin by building the edge descendants matrix (edge-by-sample)
-    # `edge_array`
-    #
-    # Create a list of descendants, starting from the first internal node (root)
-    ntip <- length(tree$tip.label)
-    # Create a matrix that maps each internal node to its 2 descendants
-    # This matrix doesn't include the tips, so must use node#-ntip to index into
-    # it
-    ## to suppress the warning if the number of node edges is uneven, we add the
-    ## first node again
-    node.desc <- tree$edge[order(tree$edge[,1]),][,2]
-    if(length(node.desc) %% 2 == 1){
-        node.desc <- c(node.desc,node.desc[1])
-    }
-    node.desc <- matrix(node.desc, byrow = TRUE, ncol = 2)
-    # Define the edge_array object
-    # Right now this is a node_array object, each row is a node (including tips)
-    # It will be subset and ordered to match tree$edge later
-    edge_array <- matrix(0, nrow = ntip+tree$Nnode, ncol = ncol(x),
-                         dimnames = list(NULL, sample_names = colnames(x)))
-    # Load the tip counts in directly
-    x_tip <- x[ rownames(x) %in% tree$tip.label, ]
-    edge_array[seq_len(ntip),] <- x_tip
-    # Get a list of internal nodes ordered by increasing depth
-    ord.node <- order(node.depth(tree))[(ntip+1):(ntip+tree$Nnode)]
-    # Loop over internal nodes, summing their descendants to get that nodes
-    # count
-    for(i in ord.node){
-        edge_array[i,] <- colSums(edge_array[node.desc[i-ntip,], , drop=FALSE],
-                                  na.rm = TRUE)
-    }
-    # Keep only those with a parental edge (drops root) and order to match
-    # tree$edge
-    edge_array <- edge_array[tree$edge[,2],]
-    # calculate the sums per sample
-    samplesums <- colSums(x)
-    # Remove unneeded variables.
-    rm(node.desc)
-    ############################################################################
-    # calculate the distances
-    ############################################################################
-    if(weighted){
-        if(!normalized){
-            distlist <- BiocParallel::bplapply(spn,
-                                               unifrac_weighted_not_norm,
-                                               tree = tree,
-                                               samplesums = samplesums,
-                                               edge_array = edge_array,
-                                               BPPARAM = BPPARAM)
-        } else {
-            # This is only relevant to weighted-Unifrac.
-            # For denominator in the normalized distance, we need the age of each
-            # tip.
-            # 'z' is the tree in postorder order used in calls to .C
-            # Descending order of left-hand side of edge (the ancestor to the node)
-            z <- ape::reorder.phylo(tree, order="postorder")
-            # Call phyloseq-internal function that in-turn calls ape's internal
-            # horizontal position function, in C, using the re-ordered phylo object,
-            # `z`
-            tipAges = node.depth.edgelength(tree)
-            # Keep only the tips, and add the tip labels in case `z` order differs
-            # from `tree`
-            tipAges <- tipAges[seq.int(1L, length(tree$tip.label))]
-            names(tipAges) <- z$tip.label
-            # Explicitly re-order tipAges to match x
-            tipAges <- tipAges[rownames(x)]
-            distlist <- BiocParallel::bplapply(spn,
-                                               unifrac_weighted_norm,
-                                               mat = x,
-                                               tree = tree,
-                                               samplesums = samplesums,
-                                               edge_array = edge_array,
-                                               tipAges = tipAges,
-                                               BPPARAM = BPPARAM)
-        }
-    } else {
-        # For unweighted Unifrac, convert the edge_array to an occurrence
-        # (presence/absence binary) array
-        edge_occ <- (edge_array > 0) - 0
-        distlist <- BiocParallel::bplapply(spn,
-                                           unifrac_unweighted,
-                                           tree = tree,
-                                           samplesums = samplesums,
-                                           edge_occ = edge_occ,
-                                           BPPARAM = BPPARAM)
-    }
-    # Initialize UnifracMat with NAs
-    UnifracMat <- matrix(NA_real_, ncol(x), ncol(x))
-    rownames(UnifracMat) <- colnames(UnifracMat) <- colnames(x)
-    # Matrix-assign lower-triangle of UnifracMat. Then coerce to dist and
-    # return.
-    matIndices <- matIndices <- matrix(c(vapply(spn,"[",character(1),2L),
-                                         vapply(spn,"[",character(1),1L)),
-                                       ncol = 2)
-    UnifracMat[matIndices] <- unlist(distlist)
-    #
-    stats::as.dist(UnifracMat)
+    # Calculate unifrac. Use implementation from rbiom package
+    res <- unifrac(x, tree = tree, weighted = weighted)
+    return(res)
 }
 
 # Aggregate matrix based on nodeLabs. At the same time, rename rows based on nodeLab
 # --> each row represent specific node of tree
+#' @importFrom scuttle sumCountsAcrossFeatures
 .merge_assay_by_rows <- function(x, nodeLab, average = FALSE, ...){
     if( !.is_a_bool(average) ){
         stop("'average' must be TRUE or FALSE.", call. = FALSE)
     }
     # Merge assay based on nodeLabs
-    x <- scuttle::sumCountsAcrossFeatures(x, ids = nodeLab, 
-                                          subset.row = NULL, subset.col = NULL, 
-                                          average = average)
+    x <- sumCountsAcrossFeatures(
+        x, ids = nodeLab, subset.row = NULL, subset.col = NULL,
+        average = average)
     # Remove NAs from nodeLab
     nodeLab <- nodeLab[ !is.na(nodeLab) ]
     # Get the original order back
@@ -399,57 +270,6 @@ runUnifrac <- function(x, tree, weighted = FALSE, normalized = TRUE,
     return(x)
 }
 
-unifrac_unweighted <- function(i, tree, samplesums, edge_occ){
-    A  <- i[1]
-    B  <- i[2]
-    AT <- samplesums[A]
-    BT <- samplesums[B]
-    # Unweighted Unifrac
-    # Subset matrix to just columns A and B
-    edge_occ_AB <- edge_occ[, c(A, B)]
-    edge_occ_AB_rS <- rowSums(edge_occ_AB, na.rm = TRUE)
-    # Keep only the unique branches. Sum the lengths
-    edge_uni_AB_sum <- sum((tree$edge.length * edge_occ_AB)[edge_occ_AB_rS < 2,],
-                           na.rm=TRUE)
-    # Normalize this sum to the total branches among these two samples, A and
-    # B
-    uwUFpairdist <- edge_uni_AB_sum /
-        sum(tree$edge.length[edge_occ_AB_rS > 0])
-    uwUFpairdist
-}
-# if not-normalized weighted Unifrac, just return "numerator";
-# the u-value in the w-Unifrac description
-unifrac_weighted_not_norm <- function(i, tree, samplesums, edge_array){
-    A  <- i[1]
-    B  <- i[2]
-    AT <- samplesums[A]
-    BT <- samplesums[B]
-    # weighted Unifrac
-    wUF_branchweight <- abs(edge_array[, A]/AT - edge_array[, B]/BT)
-    # calculate the w-UF numerator
-    numerator <- sum((tree$edge.length * wUF_branchweight), na.rm = TRUE)
-    # if not-normalized weighted Unifrac, just return "numerator";
-    # the u-value in the w-Unifrac description
-    numerator
-}
-unifrac_weighted_norm <- function(i, mat, tree, samplesums, edge_array,
-                                  tipAges){
-    A  <- i[1]
-    B  <- i[2]
-    AT <- samplesums[A]
-    BT <- samplesums[B]
-    # weighted Unifrac
-    wUF_branchweight <- abs(edge_array[, A]/AT - edge_array[, B]/BT)
-    # calculate the w-UF numerator
-    numerator <- sum((tree$edge.length * wUF_branchweight), na.rm = TRUE)
-    # denominator (assumes tree-indices and matrix indices are same order)
-    denominator <- sum((tipAges * (mat[, A]/AT + mat[, B]/BT)), na.rm = TRUE)
-    # return the normalized weighted Unifrac values
-    numerator / denominator
-}
-
-################################################################################
-
 #' @importFrom ape is.rooted root
 .norm_tree_to_be_rooted <- function(tree, names){
     if( !is.rooted(tree) ){
diff --git a/man/calculateUnifrac.Rd b/man/calculateUnifrac.Rd
index 836958a1c..0ff762973 100644
--- a/man/calculateUnifrac.Rd
+++ b/man/calculateUnifrac.Rd
@@ -9,14 +9,7 @@
 \usage{
 calculateUnifrac(x, tree, ...)
 
-\S4method{calculateUnifrac}{ANY,phylo}(
-  x,
-  tree,
-  weighted = FALSE,
-  normalized = TRUE,
-  BPPARAM = SerialParam(),
-  ...
-)
+\S4method{calculateUnifrac}{ANY,phylo}(x, tree, weighted = FALSE, ...)
 
 \S4method{calculateUnifrac}{TreeSummarizedExperiment,missing}(
   x,
@@ -28,15 +21,7 @@ calculateUnifrac(x, tree, ...)
   ...
 )
 
-runUnifrac(
-  x,
-  tree,
-  weighted = FALSE,
-  normalized = TRUE,
-  nodeLab = NULL,
-  BPPARAM = SerialParam(),
-  ...
-)
+runUnifrac(x, tree, weighted = FALSE, nodeLab = NULL, ...)
 }
 \arguments{
 \item{x}{a numeric matrix or a
@@ -61,16 +46,6 @@ species/taxa shared between samples, whereas unweighted-Unifrac only
 considers presence/absence. Default is \code{FALSE}, meaning the
 unweighted-Unifrac distance is calculated for all pairs of samples.}
 
-\item{normalized}{\code{TRUE} or \code{FALSE}: Should the output be
-normalized such that values range from 0 to 1 independent of branch length
-values? Default is \code{TRUE}. Note that (unweighted) \code{Unifrac} is
-always normalized by total branch-length, and so this value is ignored when
-\code{weighted == FALSE}.}
-
-\item{BPPARAM}{A
-\code{\link[BiocParallel:BiocParallelParam-class]{BiocParallelParam}}
-object specifying whether the Unifrac calculation should be parallelized.}
-
 \item{assay.type}{a single \code{character} value for specifying which
 assay to use for calculation.}
 
@@ -100,9 +75,9 @@ node labs must be present in \code{tree}.}
 a sample-by-sample distance matrix, suitable for NMDS, etc.
 }
 \description{
-This function calculates the (Fast) Unifrac distance for all sample-pairs
+This function calculates the Unifrac distance for all sample-pairs
 in a \code{\link[TreeSummarizedExperiment:TreeSummarizedExperiment-class]{TreeSummarizedExperiment}}
-object.
+object. The function utilizes \code{\link[rbiom:unifrac]{rbiom:unifrac()}}.
 }
 \details{
 Please note that if \code{calculateUnifrac} is used as a \code{FUN} for
@@ -113,28 +88,18 @@ data(esophagus)
 library(scater)
 calculateUnifrac(esophagus, weighted = FALSE)
 calculateUnifrac(esophagus, weighted = TRUE)
-calculateUnifrac(esophagus, weighted = TRUE, normalized = FALSE)
 # for using calculateUnifrac in conjunction with runMDS the tree argument
 # has to be given separately. In addition, subsetting using ntop must
 # be disabled
 esophagus <- runMDS(esophagus, FUN = calculateUnifrac, name = "Unifrac",
                     tree = rowTree(esophagus),
-                    exprs_values = "counts",
+                    assay.type = "counts",
                     ntop = nrow(esophagus))
 reducedDim(esophagus)
 }
 \references{
 \url{http://bmf.colorado.edu/unifrac/}
 
-The main implementation (Fast Unifrac) is adapted from the algorithm's
-description in:
-
-Hamady, Lozupone, and Knight,
-``\href{http://www.nature.com/ismej/journal/v4/n1/full/ismej200997a.html}{Fast
-UniFrac:} facilitating high-throughput phylogenetic analyses of microbial
-communities including analysis of pyrosequencing and PhyloChip data.'' The
-ISME Journal (2010) 4, 17--27.
-
 See also additional descriptions of Unifrac in the following articles:
 
 Lozupone, Hamady and Knight, ``Unifrac - An Online Tool for Comparing
@@ -148,7 +113,3 @@ microbial communities.'' Appl Environ Microbiol. 2007
 Lozupone C, Knight R. ``Unifrac: a new phylogenetic method for comparing
 microbial communities.'' Appl Environ Microbiol. 2005 71 (12):8228-35.
 }
-\author{
-Paul J. McMurdie.
-Adapted for mia by Felix G.M. Ernst
-}
diff --git a/tests/testthat/test-5Unifrac.R b/tests/testthat/test-5Unifrac.R
index 6080e2f22..023842946 100644
--- a/tests/testthat/test-5Unifrac.R
+++ b/tests/testthat/test-5Unifrac.R
@@ -1,100 +1,85 @@
 context("Unifrac beta diversity")
 test_that("Unifrac beta diversity", {
-    skip_if_not(require("phyloseq", quietly = TRUE))
-    
     data(esophagus, package="mia")
     tse <- esophagus
     tse <- transformAssay(tse, assay.type="counts", method="relabundance")
     
     expect_error(
         calculateUnifrac(tse, assay.type = "test", tree_name = "phylo",
-                         weighted = FALSE, normalized = TRUE,
-                         BPPARAM = SerialParam())
+                         weighted = FALSE)
     )
     expect_error(
         calculateUnifrac(tse, assay.type = 2, tree_name = "phylo",
-                         weighted = FALSE, normalized = TRUE,
-                         BPPARAM = SerialParam())
+                         weighted = FALSE)
     )
     expect_error(
         calculateUnifrac(tse, assay.type = TRUE, tree_name = "phylo",
-                         weighted = FALSE, normalized = TRUE,
-                         BPPARAM = SerialParam())
+                         weighted = FALSE)
     )
     expect_error(
         calculateUnifrac(tse, assay.type = "counts", tree_name = "test",
-                         weighted = FALSE, normalized = TRUE,
-                         BPPARAM = SerialParam())
+                         weighted = FALSE)
     )
     expect_error(
         calculateUnifrac(tse, assay.type = "counts", tree_name = 1,
-                         weighted = FALSE, normalized = TRUE,
-                         BPPARAM = SerialParam())
+                         weighted = FALSE)
     )
     expect_error(
         calculateUnifrac(tse, assay.type = "counts", tree_name = TRUE,
-                         weighted = "FALSE", normalized = TRUE,
-                         BPPARAM = SerialParam())
-    )
-    expect_error(
-        calculateUnifrac(tse, assay.type = "counts", tree_name = "phylo",
-                         weighted = 1, normalized = TRUE,
-                         BPPARAM = SerialParam())
-    )
-    expect_error(
-        calculateUnifrac(tse, assay.type = "counts", tree_name = "phylo",
-                         weighted = FALSE, normalized = "TRUE",
-                         BPPARAM = SerialParam())
+                         weighted = "FALSE")
     )
     expect_error(
         calculateUnifrac(tse, assay.type = "counts", tree_name = "phylo",
-                         weighted = FALSE, normalized = 1,
-                         BPPARAM = SerialParam())
+                         weighted = 1)
     )
     
     data(GlobalPatterns, package="mia")
     tse <- GlobalPatterns
-    # Compare to phyloseq function
-    .require_package("phyloseq")
-    # Convert data into phyloseq
-    pseq <- makePhyloseqFromTreeSE(tse)
-    # Calculate unifrac
-    unifrac_tse <- as.matrix(calculateUnifrac(tse))
-    unifrac_pseq <- as.matrix(phyloseq::UniFrac(pseq))
-    expect_equal(unifrac_tse, unifrac_pseq)
-    # Calculate unifrac
-    unifrac_tse <- as.matrix(calculateUnifrac(tse, weighted = TRUE, normalized = FALSE))
-    unifrac_pseq <- as.matrix(phyloseq::UniFrac(pseq, weighted = TRUE, normalized = FALSE))
-    expect_equal(unifrac_tse, unifrac_pseq)
+    # Calculate unweighted unifrac
+    unifrac_mia <- as.matrix(calculateUnifrac(tse, weighted = FALSE))
+    unifrac_rbiom <- as.matrix(rbiom::unifrac(assay(tse), weighted = FALSE,
+                                              rowTree(tse)))
+    expect_equal(unifrac_mia, unifrac_rbiom)
+    # Calculate weighted unifrac. Allow tolerance since weighted unifrac
+    # calculation in rbiom has some stochasticity. That is most likely due
+    # multithreading and complex structure of tree (loops).
+    unifrac_mia <- as.matrix(calculateUnifrac(tse, weighted = TRUE))
+    unifrac_rbiom <- as.matrix(rbiom::unifrac(assay(tse), weighted = TRUE,
+                                              rowTree(tse)))
+    expect_equal(unifrac_mia, unifrac_rbiom, tolerance = 1e-4)
     
-    # Test with merged object with multiple trees
+    # Test with merged object with multiple trees. runUnifrac takes subset of
+    # data based on provided tree.
     tse <- mergeSEs(GlobalPatterns, esophagus, assay.type="counts", missing_values = 0)
+    tse_ref <- tse
+    tse_ref <- tse_ref[ rowLinks(tse_ref)[["whichTree"]] == "phylo", ]
+    # Calculate unweighted unifrac
+    unifrac_mia <- as.matrix(calculateUnifrac(tse, weighted = FALSE))
+    unifrac_rbiom <- as.matrix(rbiom::unifrac(assay(tse_ref), weighted = FALSE,
+                                              rowTree(tse_ref)))
+    expect_equal(unifrac_mia, unifrac_rbiom)
+    # Calculate weighted unifrac
+    unifrac_mia <- as.matrix(calculateUnifrac(tse, weighted = TRUE))
+    unifrac_rbiom <- as.matrix(rbiom::unifrac(assay(tse_ref), weighted = TRUE,
+                                              rowTree(tse_ref)))
+    expect_equal(unifrac_mia, unifrac_rbiom, tolerance = 1e-4)
     
-    # Convert data into phyloseq
-    pseq <- makePhyloseqFromTreeSE(tse, assay.type="counts")
-    # Convert back to TreeSE (pseq has pruned tree)
-    tse <- makeTreeSEFromPhyloseq(pseq)
-    # Calculate unifrac
-    unifrac_tse <- as.matrix(calculateUnifrac(tse))
-    unifrac_pseq <- as.matrix(phyloseq::UniFrac(pseq))
-    expect_equal(unifrac_tse, unifrac_pseq)
-    # Calculate unifrac
-    unifrac_tse <- as.matrix(calculateUnifrac(tse, weighted = TRUE, normalized = FALSE, 
-                                              tree_name = "phylo"))
-    unifrac_pseq <- as.matrix(phyloseq::UniFrac(pseq, weighted = TRUE, normalized = FALSE))
-    expect_equal(unifrac_tse, unifrac_pseq)
-    
-    # Test the function with agglomerated data
-    tse <- agglomerateByRank(tse, rank = "Phylum")
-    # Convert data into phyloseq
-    suppressWarnings( pseq <- makePhyloseqFromTreeSE(tse) )
-    # Convert back to TreeSE (pseq has pruned tree)
-    tse <- makeTreeSEFromPhyloseq(pseq)
-    # Calculate unifrac
-    unifrac_tse <- as.matrix(calculateUnifrac(tse, normalized = TRUE))
-    unifrac_pseq <- as.matrix(phyloseq::UniFrac(pseq, normalized = TRUE))
-    expect_equal(unifrac_tse, unifrac_pseq)
-    
-    # Detach phyloseq package
-    unloadNamespace("phyloseq")
+    # Test the function with agglomerated data. runUnifrac renames rownames
+    # based on tips and links to them. Then it also prunes the tree so that
+    # rows are in tips.
+    tse <- GlobalPatterns
+    tse <- agglomerateByRank(tse, rank = "Species")
+    tse_ref <- tse
+    rownames(tse_ref) <- rowLinks(tse_ref)[["nodeLab"]]
+    rowTree(tse_ref) <- .prune_tree(rowTree(tse_ref), rowLinks(tse_ref)[["nodeLab"]])
+    # Calculate unweighted unifrac
+    unifrac_mia <- as.matrix(calculateUnifrac(tse, weighted = FALSE))
+    unifrac_rbiom <- as.matrix(rbiom::unifrac(assay(tse_ref), weighted = FALSE,
+                                              rowTree(tse_ref)))
+    # Calculate weighted unifrac. No tolerance needed since the tree has
+    # simpler structure after pruning.
+    unifrac_mia <- as.matrix(calculateUnifrac(tse, weighted = TRUE))
+    unifrac_rbiom <- as.matrix(rbiom::unifrac(assay(tse_ref), weighted = TRUE,
+                                              rowTree(tse_ref)))
+    expect_equal(unifrac_mia, unifrac_rbiom)
 })