diff --git a/DESCRIPTION b/DESCRIPTION index 982ec444c..e66049313 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: mia Type: Package -Version: 1.13.22 +Version: 1.13.23 Authors@R: c(person(given = "Felix G.M.", family = "Ernst", role = c("aut"), email = "felix.gm.ernst@outlook.com", diff --git a/NEWS b/NEWS index 23badbede..13f930df9 100644 --- a/NEWS +++ b/NEWS @@ -138,3 +138,4 @@ Changes in version 1.13.x calculateCCA to getCCA + add informative error message in rarefyAssay on assays with strictly-negative values + Use rbiom package in unifrac implementation ++ Updated parameter names to follow naming convention "parameter.name" diff --git a/R/agglomerate.R b/R/agglomerate.R index 81cef8fc8..7c06b1b55 100644 --- a/R/agglomerate.R +++ b/R/agglomerate.R @@ -32,11 +32,13 @@ #' regarded as empty. (Default: \code{c(NA, "", " ", "\t")}). They will be #' removed if \code{na.rm = TRUE} before agglomeration. #' -#' @param agglomerate.tree \code{TRUE} or \code{FALSE}: should +#' @param update.tree \code{TRUE} or \code{FALSE}: should #' \code{rowTree()} also be agglomerated? (Default: -#' \code{agglomerate.tree = FALSE}) +#' \code{update.tree = FALSE}) #' -#' @param agglomerateTree alias for \code{agglomerate.tree}. +#' @param agglomerateTree Deprecated. Use \code{update.tree} instead. +#' +#' @param agglomerate.tree Deprecated. Use \code{update.tree} instead. #' #' @param ... arguments passed to \code{agglomerateByRank} function for #' \code{SummarizedExperiment} objects, @@ -45,26 +47,26 @@ #' to \code{getPrevalence} and \code{getPrevalentTaxa} and used in #' \code{agglomeratebyPrevalence} #' \itemize{ -#' \item \code{remove_empty_ranks}: A single boolean value for selecting +#' \item \code{empty.ranks.rm}: A single boolean value for selecting #' whether to remove those columns of rowData that include only NAs after -#' agglomeration. (By default: \code{remove_empty_ranks = FALSE}) -#' \item \code{make_unique}: A single boolean value for selecting -#' whether to make rownames unique. (By default: \code{make_unique = TRUE}) +#' agglomeration. (By default: \code{empty.ranks.rm = FALSE}) +#' \item \code{make.unique}: A single boolean value for selecting +#' whether to make rownames unique. (By default: \code{make.unique = TRUE}) #' \item \code{detection}: Detection threshold for absence/presence. #' Either an absolute value compared directly to the values of \code{x} -#' or a relative value between 0 and 1, if \code{as_relative = FALSE}. +#' or a relative value between 0 and 1, if \code{as.relative = FALSE}. #' \item \code{prevalence}: Prevalence threshold (in 0 to 1). The #' required prevalence is strictly greater by default. To include the -#' limit, set \code{include_lowest} to \code{TRUE}. +#' limit, set \code{include.lowest} to \code{TRUE}. #' \item \code{as.relative}: Logical scalar: Should the detection #' threshold be applied on compositional (relative) abundances? #' (default: \code{FALSE}) -#' \item \code{mergeRefSeq} \code{TRUE} or \code{FALSE}: Should a +#' \item \code{update.refseq} \code{TRUE} or \code{FALSE}: Should a #' consensus sequence be calculated? If set to \code{FALSE}, the result #' from \code{archetype} is returned; If set to \code{TRUE} the result #' from #' \code{\link[DECIPHER:ConsensusSequence]{DECIPHER::ConsensusSequence}} -#' is returned. (Default: \code{mergeRefSeq = FALSE}) +#' is returned. (Default: \code{update.refseq = FALSE}) #' \item \code{archetype} Of each level of \code{f}, which element should #' be regarded as the archetype and metadata in the columns or rows kept, #' while merging? This can be single integer value or an integer vector @@ -76,10 +78,12 @@ #' @param altexp String or integer scalar specifying an alternative experiment #' containing the input data. #' -#' @param strip_altexp \code{TRUE} or \code{FALSE}: Should alternative +#' @param altexp.rm \code{TRUE} or \code{FALSE}: Should alternative #' experiments be removed prior to agglomeration? This prevents to many #' nested alternative experiments by default (default: -#' \code{strip_altexp = TRUE}) +#' \code{altexp.rm = TRUE}) +#' +#' @param strip_altexp Deprecated. Use \code{altexp.rm} instead. #' #' @param MARGIN A character value for selecting if data is merged #' row-wise / for features ('rows') or column-wise / for samples ('cols'). @@ -90,8 +94,10 @@ #' merged. If \code{length(levels(f)) == nrow(x)/ncol(x)}, \code{x} will be #' returned unchanged. #' -#' @param mergeTree \code{TRUE} or \code{FALSE}: Should -#' \code{rowTree()} also be merged? (Default: \code{mergeTree = FALSE}) +#' @param update.tree \code{TRUE} or \code{FALSE}: Should +#' \code{rowTree()} also be merged? (Default: \code{update.tree = FALSE}) +#' +#' @param mergeTree Deprecated. Use \code{update.tree} instead. #' #' @details #' @@ -144,7 +150,7 @@ #' #' # agglomerate the tree as well #' x2 <- agglomerateByRank(GlobalPatterns, rank="Family", -#' agglomerate.tree = TRUE) +#' update.tree = TRUE) #' nrow(x2) # same number of rows, but #' rowTree(x1) # ... different #' rowTree(x2) # ... tree @@ -166,12 +172,12 @@ #' print(rownames(x3[1:3,])) #' #' # To add them, use getTaxonomyLabels function. -#' rownames(x3) <- getTaxonomyLabels(x3, with_rank = TRUE) +#' rownames(x3) <- getTaxonomyLabels(x3, with.rank = TRUE) #' print(rownames(x3[1:3,])) #' -#' # use 'remove_empty_ranks' to remove columns that include only NAs +#' # use 'empty.ranks.rm' to remove columns that include only NAs #' x4 <- agglomerateByRank(GlobalPatterns, rank="Phylum", -#' remove_empty_ranks = TRUE) +#' empty.ranks.rm = TRUE) #' head(rowData(x4)) #' #' # If the assay contains NAs, you might want to consider replacing them, @@ -201,7 +207,7 @@ #' f <- factor(regmatches(rownames(esophagus), #' regexpr("^[0-9]*_[0-9]*",rownames(esophagus)))) #' merged <- agglomerateByVariable(esophagus, MARGIN = "rows", f, -#' mergeTree = TRUE) +#' update.tree = TRUE) #' plot(rowTree(merged)) #' # #' data(GlobalPatterns) @@ -258,7 +264,7 @@ setMethod("agglomerateByRank", signature = c(x = "SummarizedExperiment"), tax_cols <- .get_tax_cols_from_se(x) # if na.rm is TRUE, remove the empty, white-space, NA values from - # tree will be pruned later, if agglomerate.tree = TRUE + # tree will be pruned later, if update.tree = TRUE if( na.rm ){ x <- .remove_with_empty_taxonomic_info(x, tax_cols[col], empty.fields) @@ -293,8 +299,8 @@ setMethod("agglomerateByRank", signature = c(x = "SummarizedExperiment"), } # adjust rownames rownames(x) <- getTaxonomyLabels(x, empty.fields, ..., - with_rank = FALSE, - resolve_loops = FALSE) + with.rank = FALSE, + resolve.loops = FALSE) # Remove those columns from rowData that include only NAs x <- .remove_NA_cols_from_rowdata(x, ...) x <- .add_values_to_metadata(x, "agglomerated_by_rank", rank) @@ -321,13 +327,13 @@ setMethod("agglomerateByVariable", signature = c(x = "SummarizedExperiment"), #' @export setMethod("agglomerateByVariable", signature = c(x = "TreeSummarizedExperiment"), - function(x, MARGIN, f, mergeTree = FALSE, ...){ + function(x, MARGIN, f, update.tree = mergeTree, mergeTree = FALSE, ...){ # Check MARGIN MARGIN <- .check_MARGIN(MARGIN) # Get function based on MARGIN FUN <- switch(MARGIN, .merge_rows_TSE, .merge_cols_TSE) # Agglomerate - x <- FUN(x, f, mergeTree = mergeTree, ...) + x <- FUN(x, f, update.tree = update.tree, ...) return(x) } ) @@ -336,16 +342,16 @@ setMethod("agglomerateByVariable", #' @importFrom SingleCellExperiment altExp altExp<- altExps<- #' @export setMethod("agglomerateByRank", signature = c(x = "SingleCellExperiment"), - function(x, ..., altexp = NULL, strip_altexp = TRUE){ + function(x, ..., altexp = NULL, altexp.rm = strip_altexp, strip_altexp = TRUE){ # input check - if(!.is_a_bool(strip_altexp)){ - stop("'strip_altexp' mus be TRUE or FALSE.", call. = FALSE) + if(!.is_a_bool(altexp.rm)){ + stop("'altexp.rm' mus be TRUE or FALSE.", call. = FALSE) } # if (!is.null(altexp)) { x <- altExp(x, altexp) } - if(strip_altexp && is(x, "SingleCellExperiment")){ + if(altexp.rm && is(x, "SingleCellExperiment")){ altExps(x) <- NULL } callNextMethod(x, ...) @@ -357,10 +363,11 @@ setMethod("agglomerateByRank", signature = c(x = "SingleCellExperiment"), setMethod( "agglomerateByRank", signature = c(x = "TreeSummarizedExperiment"), function( - x, ..., agglomerate.tree = agglomerateTree, agglomerateTree = FALSE){ + x, ..., update.tree = agglomerateTree, agglomerate.tree = agglomerateTree, + agglomerateTree = FALSE){ # input check - if(!.is_a_bool(agglomerate.tree)){ - stop("'agglomerate.tree' must be TRUE or FALSE.", + if(!.is_a_bool(update.tree)){ + stop("'update.tree' must be TRUE or FALSE.", call. = FALSE) } # If there are multipe rowTrees, it might be that multiple @@ -372,7 +379,7 @@ setMethod( x <- .order_based_on_trees(x) } # Agglomerate data - x <- callNextMethod(x, mergeTree = agglomerate.tree, ...) + x <- callNextMethod(x, update.tree = update.tree, ...) return(x) } ) @@ -392,14 +399,15 @@ setMethod( # This function removes empty rank columns from rowdata. (Those that include # only NA values) -.remove_NA_cols_from_rowdata <- function(x, remove_empty_ranks = FALSE, ...){ - # Check remove_empty_ranks - if( !.is_a_bool(remove_empty_ranks) ){ - stop("'remove_empty_ranks' must be a boolean value.", +.remove_NA_cols_from_rowdata <- function(x, empty.ranks.rm = remove_empty_ranks, + remove_empty_ranks = FALSE, ...){ + # Check empty.ranks.rm + if( !.is_a_bool(empty.ranks.rm) ){ + stop("'empty.ranks.rm' must be a boolean value.", call. = FALSE) } # If user wants to remove those columns - if( remove_empty_ranks ){ + if( empty.ranks.rm ){ # Get columns that include taxonomy information rank_cols <- taxonomyRanks(x) # Get rowData with only taxonomy diff --git a/R/calculateUnifrac.R b/R/calculateUnifrac.R index 322c12fee..fa432d9fb 100644 --- a/R/calculateUnifrac.R +++ b/R/calculateUnifrac.R @@ -21,10 +21,12 @@ #' matrix. This means that the phylo object and the columns should relate #' to the same type of features (aka. microorganisms). #' -#' @param nodeLab if \code{x} is a matrix, +#' @param node.label if \code{x} is a matrix, #' a \code{character} vector specifying links between rows/columns and tips of \code{tree}. #' The length must equal the number of rows/columns of \code{x}. Furthermore, all the #' node labs must be present in \code{tree}. +#' +#' @param nodeLab Deprecated. Use \code{node.label} instead. #' #' @param assay.type a single \code{character} value for specifying which #' assay to use for calculation. @@ -38,9 +40,11 @@ #' (Please use \code{assay.type} instead. At some point \code{assay_name} #' will be disabled.) #' -#' @param tree_name a single \code{character} value for specifying which +#' @param tree.name a single \code{character} value for specifying which #' tree will be used in calculation. -#' (By default: \code{tree_name = "phylo"}) +#' (By default: \code{tree.name = "phylo"}) +#' +#' @param tree_name Deprecated. Use \code{tree.name} instead. #' #' @param weighted \code{TRUE} or \code{FALSE}: Should use weighted-Unifrac #' calculation? Weighted-Unifrac takes into account the relative abundance of @@ -121,19 +125,19 @@ setMethod("calculateUnifrac", signature = c(x = "TreeSummarizedExperiment", tree = "missing"), function(x, assay.type = assay_name, assay_name = exprs_values, exprs_values = "counts", - tree_name = "phylo", transposed = FALSE, ...){ + tree.name = tree_name, tree_name = "phylo", transposed = FALSE, ...){ # Check assay.type and get assay .check_assay_present(assay.type, x) mat <- assay(x, assay.type) if(!transposed){ - # Check tree_name - .check_rowTree_present(tree_name, x) + # Check tree.name + .check_rowTree_present(tree.name, x) # Get tree - tree <- rowTree(x, tree_name) + tree <- rowTree(x, tree.name) # Select only those features that are in the rowTree - whichTree <- rowLinks(x)[, "whichTree"] == tree_name + whichTree <- rowLinks(x)[, "whichTree"] == tree.name if( any(!whichTree) ){ - warning("Not all rows were present in the rowTree specified by 'tree_name'.", + warning("Not all rows were present in the rowTree specified by 'tree.name'.", "'x' is subsetted.", call. = FALSE) # Subset the data x <- x[ whichTree, ] @@ -144,14 +148,14 @@ setMethod("calculateUnifrac", # Get links links <- rowLinks(x) } else { - # Check tree_name - .check_colTree_present(tree_name, x) + # Check tree.name + .check_colTree_present(tree.name, x) # Get tree - tree <- colTree(x, tree_name) + tree <- colTree(x, tree.name) # Select only those samples that are in the colTree - whichTree <- colLinks(x)[, "whichTree"] == tree_name + whichTree <- colLinks(x)[, "whichTree"] == tree.name if( any(!whichTree) ){ - warning("Not all columns were present in the colTree specified by 'tree_name'.", + warning("Not all columns were present in the colTree specified by 'tree.name'.", "'x' is subsetted.", call. = FALSE) # Subset the data x <- x[ , whichTree ] @@ -162,10 +166,10 @@ setMethod("calculateUnifrac", links <- colLinks(x) } # Remove those links (make them NA) that are not included in this tree - links[ links$whichTree != tree_name, ] <- NA + links[ links$whichTree != tree.name, ] <- NA # Take only nodeLabs links <- links[ , "nodeLab" ] - res <- calculateUnifrac(mat, tree = tree, nodeLab = links, ...) + res <- calculateUnifrac(mat, tree = tree, node.label = links, ...) return(res) } ) @@ -177,7 +181,7 @@ setMethod("calculateUnifrac", #' @importFrom rbiom unifrac #' @export runUnifrac <- function( - x, tree, weighted = FALSE, nodeLab = NULL, ...){ + x, tree, weighted = FALSE, node.label = nodeLab, nodeLab = NULL, ...){ # Check x if( !is.matrix(as.matrix(x)) ){ stop("'x' must be a matrix", call. = FALSE) @@ -196,47 +200,47 @@ runUnifrac <- function( if(is.null(colnames(x)) || is.null(rownames(x))){ stop("colnames and rownames must not be NULL", call. = FALSE) } - # nodeLab should be NULL or character vector specifying links between + # node.label should be NULL or character vector specifying links between # rows and tree labels - if( !(is.null(nodeLab) || - (is.character(nodeLab) && length(nodeLab) == nrow(x) && - all(nodeLab[ !is.na(nodeLab) ] %in% c(tree$tip.label)))) ){ + if( !(is.null(node.label) || + (is.character(node.label) && length(node.label) == nrow(x) && + all(node.label[ !is.na(node.label) ] %in% c(tree$tip.label)))) ){ stop( - "'nodeLab' must be NULL or character specifying links between ", + "'node.label' must be NULL or character specifying links between ", "abundance table and tree labels.", call. = FALSE) } # check that matrix and tree are compatible - if( is.null(nodeLab) && !all(rownames(x) %in% c(tree$tip.label)) ) { + if( is.null(node.label) && !all(rownames(x) %in% c(tree$tip.label)) ) { stop( "Incompatible tree and abundance table! Please try to provide ", - "'nodeLab'.", call. = FALSE) + "'node.label'.", call. = FALSE) } # Merge rows, so that rows that are assigned to same tree node are agglomerated # together. If nodeLabs were provided, merge based on those. Otherwise merge # based on rownames - if( is.null(nodeLab) ){ - nodeLab <- rownames(x) + if( is.null(node.label) ){ + node.label <- rownames(x) } # Prune tree if there are nodes that cannot be found from tips or if there # are tips that cannot be found from abundance matrix. It might be # that certain row is linked to internal node or that the tree has extra # tips that do not match with rows (e.g. after subsetting). - if( any( !nodeLab %in% tree$tip.label ) || - any( !tree$tip.label %in% nodeLab) ){ - tree <- .prune_tree(tree, nodeLab) + if( any( !node.label %in% tree$tip.label ) || + any( !tree$tip.label %in% node.label) ){ + tree <- .prune_tree(tree, node.label) warning("Pruning tree...", call. = FALSE) } # If node labels cannot be found from tips even after pruning, give error. # This kind of tree cannot be used in unifrac since it expects that every # row is linked to tips. - if( any( !nodeLab %in% tree$tip.label ) ){ + if( any( !node.label %in% tree$tip.label ) ){ stop( "Unifrac cannot be calculated since tree is not compatible. ", "Each row must be linked to tip of the tree.", call. = FALSE) } # Merge assay so that each row represent single tip. It might be that # multiple rows are linked to single tip. - x <- .merge_assay_by_rows(x, nodeLab, ...) + x <- .merge_assay_by_rows(x, node.label, ...) # Modify tree so that it will become rooted. tree <- .norm_tree_to_be_rooted(tree, rownames(x)) # Remove those tips that are not present in the data @@ -252,21 +256,21 @@ runUnifrac <- function( return(res) } -# Aggregate matrix based on nodeLabs. At the same time, rename rows based on nodeLab +# Aggregate matrix based on nodeLabs. At the same time, rename rows based on node.label # --> each row represent specific node of tree #' @importFrom scuttle sumCountsAcrossFeatures -.merge_assay_by_rows <- function(x, nodeLab, average = FALSE, ...){ +.merge_assay_by_rows <- function(x, node.label, average = FALSE, ...){ if( !.is_a_bool(average) ){ stop("'average' must be TRUE or FALSE.", call. = FALSE) } # Merge assay based on nodeLabs x <- sumCountsAcrossFeatures( - x, ids = nodeLab, subset.row = NULL, subset.col = NULL, + x, ids = node.label, subset.row = NULL, subset.col = NULL, average = average) - # Remove NAs from nodeLab - nodeLab <- nodeLab[ !is.na(nodeLab) ] + # Remove NAs from node.label + node.label <- node.label[ !is.na(node.label) ] # Get the original order back - x <- x[ nodeLab, ] + x <- x[ node.label, ] return(x) } diff --git a/R/deprecate.R b/R/deprecate.R index 52f8852b5..520620661 100644 --- a/R/deprecate.R +++ b/R/deprecate.R @@ -275,7 +275,7 @@ setMethod("testExperimentCrossAssociation", signature = c(x = "ANY"), .Deprecated(msg = paste0("'testExperimentCrossAssociation' is ", "deprecated. Use ", "'getCrossAssociation' instead.")) - getCrossAssociation(x, test_significance = TRUE, ...) + getCrossAssociation(x, test.signif = TRUE, ...) } ) @@ -292,7 +292,7 @@ setMethod("testExperimentCrossCorrelation", signature = c(x = "ANY"), .Deprecated(msg = paste0("'testExperimentCrossCorrelation' is ", "deprecated. Use ", "'getCrossAssociation' instead.")) - getCrossAssociation(x, test_significance = TRUE, ...) + getCrossAssociation(x, test.signif = TRUE, ...) } ) diff --git a/R/estimateDiversity.R b/R/estimateDiversity.R index dcc2493fb..1eb8dff07 100644 --- a/R/estimateDiversity.R +++ b/R/estimateDiversity.R @@ -31,14 +31,18 @@ #' stored in. By default this will use the original names of the calculated #' indices. #' -#' @param tree_name a single \code{character} value for specifying which +#' @param tree.name a single \code{character} value for specifying which #' rowTree will be used to calculate faith index. -#' (By default: \code{tree_name = "phylo"}) +#' (By default: \code{tree.name = "phylo"}) +#' +#' @param tree_name Deprecated. Use \code{tree.name} isntead. #' -#' @param node_lab NULL or a character vector specifying the links between rows and +#' @param node.label NULL or a character vector specifying the links between rows and #' node labels of \code{tree}. If a certain row is not linked with the tree, missing #' instance should be noted as NA. When NULL, all the rownames should be found from -#' the tree. (By default: \code{node_lab = NULL}) +#' the tree. (By default: \code{node.label = NULL}) +#' +#' @param node_lab Deprecated. Use \code{node.label} instead. #' #' @param BPPARAM A #' \code{\link[BiocParallel:BiocParallelParam-class]{BiocParallelParam}} @@ -53,9 +57,10 @@ #' this quantile of the data. The assumption is that abundances higher than #' this are not common, and they are classified in their own group. #' By default, \code{quantile} is 0.5. -#' \item num_of_classes: The number of arithmetic abundance classes +#' \item nclasses: The number of arithmetic abundance classes #' from zero to the quantile cutoff indicated by \code{quantile}. -#' By default, \code{num_of_classes} is 50. +#' By default, \code{nclasses} is 50. +#' \item num_of_classes Deprecated. Use \code{nclasses} instead. #' \item only.tips: A boolean value specifying whether to remove internal #' nodes when Faith's index is calculated. When \code{only.tips=TRUE}, those #' rows that are not tips of tree are removed. @@ -193,10 +198,10 @@ #' #' # 'threshold' can be used to determine threshold for 'coverage' index #' tse <- estimateDiversity(tse, index = "coverage", threshold = 0.75) -#' # 'quantile' and 'num_of_classes' can be used when +#' # 'quantile' and 'nclasses' can be used when #' # 'log_modulo_skewness' is calculated #' tse <- estimateDiversity(tse, index = "log_modulo_skewness", -#' quantile = 0.75, num_of_classes = 100) +#' quantile = 0.75, nclasses = 100) #' #' # It is recommended to specify also the final names used in the output. #' tse <- estimateDiversity(tse, @@ -285,7 +290,7 @@ setMethod("estimateDiversity", signature = c(x="TreeSummarizedExperiment"), function(x, assay.type = "counts", assay_name = NULL, index = c("coverage", "faith", "fisher", "gini_simpson", "inverse_simpson", "log_modulo_skewness", "shannon"), - name = index, tree_name = "phylo", + name = index, tree.name = tree_name, tree_name = "phylo", ..., BPPARAM = SerialParam()){ # input check supported_index <- c("coverage", "fisher", "gini_simpson", "faith", @@ -295,9 +300,9 @@ setMethod("estimateDiversity", signature = c(x="TreeSummarizedExperiment"), stop(paste("'index' must be from the following options: '", index_string), call. = FALSE) } - # Check tree_name - if( !.is_non_empty_string(tree_name) ){ - stop("'tree_name' must be a character specifying a rowTree of 'x'.", + # Check tree.name + if( !.is_non_empty_string(tree.name) ){ + stop("'tree.name' must be a character specifying a rowTree of 'x'.", call. = FALSE) } if (!is.null(assay_name)) { @@ -338,7 +343,7 @@ setMethod("estimateDiversity", signature = c(x="TreeSummarizedExperiment"), # If 'faith' was one of the indices, 'calc_faith' is TRUE if( calc_faith ){ # Get tree to check whether faith can be calculated - tree <- rowTree(x, tree_name) + tree <- rowTree(x, tree.name) # Check if faith can be calculated. Give warning and do not run estimateFaith # if there is no rowTree and other indices were also calculated. Otherwise, # run estimateFaith. (If there is no rowTree --> error) @@ -348,13 +353,13 @@ setMethod("estimateDiversity", signature = c(x="TreeSummarizedExperiment"), "since it cannot be calculated without rowTree. ", "This requires a rowTree in the input argument x. ", "Make sure that 'rowTree(x)' is not empty, or ", - "make sure to specify 'tree_name' in the input ", + "make sure to specify 'tree.name' in the input ", "arguments. Warning is also provided if the tree does ", "not have any branches. You can consider adding ", "rowTree to include this index.", call. = FALSE) } else { - x <- estimateFaith(x, name = faith_name, tree_name = tree_name, ...) + x <- estimateFaith(x, name = faith_name, tree.name = tree.name, ...) # Ensure that indices are in correct order colnames <- colnames(colData(x)) colnames <- c(colnames[ !colnames %in% name_original ], name_original) @@ -377,7 +382,7 @@ setGeneric("estimateFaith",signature = c("x", "tree"), #' @export setMethod("estimateFaith", signature = c(x="SummarizedExperiment", tree="phylo"), function(x, tree, assay.type = "counts", assay_name = NULL, - name = "faith", node_lab = NULL, ...){ + name = "faith", node.label = node_lab, node_lab = NULL, ...){ # Input check # Check 'tree' # IF there is no rowTree gives an error @@ -398,11 +403,11 @@ setMethod("estimateFaith", signature = c(x="SummarizedExperiment", tree="phylo") stop("'name' must be a non-empty character value.", call. = FALSE) } - # Check that node_lab is NULL or it specifies links between rownames and + # Check that node.label is NULL or it specifies links between rownames and # node labs - if( !( is.null(node_lab) || - is.character(node_lab) && length(node_lab) == nrow(x) ) ){ - stop("'node_lab' must be NULL or a vector specifying links between ", + if( !( is.null(node.label) || + is.character(node.label) && length(node.label) == nrow(x) ) ){ + stop("'node.label' must be NULL or a vector specifying links between ", "rownames and node labs of 'tree'.", call. = FALSE) } @@ -414,12 +419,12 @@ setMethod("estimateFaith", signature = c(x="SummarizedExperiment", tree="phylo") call. = FALSE) } # Subset and rename rows of the assay to correspond node_labs - if( !is.null(node_lab) ){ + if( !is.null(node.label) ){ # Subset - mat <- mat[ !is.na(node_lab), ] - node_lab <- node_lab[ !is.na(node_lab) ] + mat <- mat[ !is.na(node.label), ] + node.label <- node.label[ !is.na(node.label) ] # Rename - rownames(mat) <- node_lab + rownames(mat) <- node.label } # Calculates Faith index faith <- list(.calc_faith(mat, tree, ...)) @@ -432,31 +437,31 @@ setMethod("estimateFaith", signature = c(x="SummarizedExperiment", tree="phylo") #' @export setMethod("estimateFaith", signature = c(x="TreeSummarizedExperiment", tree="missing"), function(x, assay.type = "counts", assay_name = NULL, - name = "faith", tree_name = "phylo", ...){ - # Check tree_name - if( !.is_non_empty_character(tree_name) ){ - stop("'tree_name' must be a character specifying a rowTree of 'x'.", + name = "faith", tree.name = tree_name, tree_name = "phylo", ...){ + # Check tree.name + if( !.is_non_empty_character(tree.name) ){ + stop("'tree.name' must be a character specifying a rowTree of 'x'.", call. = FALSE) } # Gets the tree - tree <- rowTree(x, tree_name) + tree <- rowTree(x, tree.name) if( is.null(tree) || is.null(tree$edge.length)){ - stop("rowTree(x, tree_name) is NULL or the tree does not have any branches. ", + stop("rowTree(x, tree.name) is NULL or the tree does not have any branches. ", "The Faith's alpha diversity index cannot be calculated.", call. = FALSE) } # Get node labs node_lab <- rowLinks(x)[ , "nodeLab" ] - node_lab[ rowLinks(x)[, "whichTree"] != tree_name ] <- NA + node_lab[ rowLinks(x)[, "whichTree"] != tree.name ] <- NA # Give a warning, data will be subsetted if( any(is.na(node_lab)) ){ - warning("The rowTree named 'tree_name' does not include all the ", + warning("The rowTree named 'tree.name' does not include all the ", "rows which is why 'x' is subsetted when the Faith's alpha ", "diversity index is calculated.", call. = FALSE) } # Calculates the Faith index - estimateFaith(x, tree, name = name, node_lab = node_lab, ...) + estimateFaith(x, tree, name = name, node.label = node_lab, ...) } ) @@ -586,22 +591,23 @@ setMethod("estimateFaith", signature = c(x="TreeSummarizedExperiment", tree="mis return(faiths) } -.calc_log_modulo_skewness <- function(mat, quantile = 0.5, num_of_classes = 50, ...){ +.calc_log_modulo_skewness <- function(mat, quantile = 0.5, + nclasses = num_of_classes, num_of_classes = 50, ...){ # quantile must be a numeric value between 0-1 if( !( is.numeric(quantile) && (quantile >= 0 && quantile <= 1) ) ){ stop("'quantile' must be a numeric value between 0-1.", call. = FALSE) } - # num_of_classes must be a positive numeric value - if( !( is.numeric(num_of_classes) && num_of_classes > 0 ) ){ - stop("'num_of_classes' must be a positive numeric value.", + # nclasses must be a positive numeric value + if( !( is.numeric(nclasses) && nclasses > 0 ) ){ + stop("'nclasses' must be a positive numeric value.", call. = FALSE) } # Determine the quantile point. quantile_point <- quantile(max(mat), quantile) # Tabulate the arithmetic abundance classes. Use the same classes # for all samples for consistency - cutpoints <- c(seq(0, quantile_point, length=num_of_classes), Inf) + cutpoints <- c(seq(0, quantile_point, length=nclasses), Inf) # Calculates sample-wise frequencies. How many taxa in each interval? freq_table <- table(cut(mat, cutpoints), col(mat)) # Calculates the skewness of frequency table. Returns skewness for each diff --git a/R/estimateDominance.R b/R/estimateDominance.R index 7160a7ae0..108441fa0 100644 --- a/R/estimateDominance.R +++ b/R/estimateDominance.R @@ -294,7 +294,7 @@ setMethod("estimateDominance", signature = c(x = "SummarizedExperiment"), } .calc_core_dominance <- function(mat, ...){ - getPrevalentAbundance(mat, detection = 0, as_relative = TRUE) + getPrevalentAbundance(mat, detection = 0, as.relative = TRUE) } .calc_dominance <- function(mat, ntaxa, aggregate, index){ diff --git a/R/getCrossAssociation.R b/R/getCrossAssociation.R index 9f2ccc3be..156088b51 100644 --- a/R/getCrossAssociation.R +++ b/R/getCrossAssociation.R @@ -23,16 +23,10 @@ #' @param assay.type2 A single character value for selecting the #' \code{\link[SummarizedExperiment:SummarizedExperiment-class]{assay}} of #' experiment 2 to be transformed. (By default: \code{assay.type2 = "counts"}) -#' -#' @param assay_name1 a single \code{character} value for specifying which -#' assay of experiment 1 to use for calculation. -#' (Please use \code{assay.type1} instead. At some point \code{assay_name1} -#' will be disabled.) -#' -#' @param assay_name2 a single \code{character} value for specifying which -#' assay of experiment 2 to use for calculation. -#' (Please use \code{assay.type2} instead. At some point \code{assay_name2} -#' will be disabled.) +#' +#' @param assay_name1 Deprecated. Use \code{assay.type1} instead. +#' +#' @param assay_name2 Deprecated. Use \code{assay.type2} instead. #' #' @param altexp1 A single numeric or character value specifying alternative experiment #' from the altExp of experiment 1. If NULL, then the experiment is itself @@ -44,13 +38,17 @@ #' and altExp option is disabled. #' (By default: \code{altexp2 = NULL}) #' -#' @param colData_variable1 A character value specifying column(s) from colData -#' of experiment 1. If colData_variable1 is used, assay.type1 is disabled. -#' (By default: \code{colData_variable1 = NULL}) +#' @param col.var1 A character value specifying column(s) from colData +#' of experiment 1. If col.var1 is used, assay.type1 is disabled. +#' (By default: \code{col.var1 = NULL}) +#' +#' @param colData_variable1 Deprecated. Use \code{col.var1} instead. #' -#' @param colData_variable2 A character value specifying column(s) from colData -#' of experiment 2. If colData_variable2 is used, assay.type2 is disabled. -#' (By default: \code{colData_variable2 = NULL}) +#' @param col.var2 A character value specifying column(s) from colData +#' of experiment 2. If col.var2 is used, assay.type2 is disabled. +#' (By default: \code{col.var2 = NULL}) +#' +#' @param colData_variable2 Deprecated. Use \code{col.var2} instead. #' #' @param MARGIN A single numeric value for selecting if association are calculated #' row-wise / for features (1) or column-wise / for samples (2). Must be \code{1} or @@ -63,36 +61,48 @@ #' @param mode A single character value for selecting output format #' Available formats are 'table' and 'matrix'. (By default: \code{mode = "table"}) #' -#' @param p_adj_method A single character value for selecting adjustment method of +#' @param p.adj.method A single character value for selecting adjustment method of #' p-values. Passed to \code{p.adjust} function. -#' (By default: \code{p_adj_method = "fdr"}) +#' (By default: \code{p.adj.method = "fdr"}) #' -#' @param p_adj_threshold A single numeric value (from 0 to 1) for selecting +#' @param p_adj_method Deprecated. Use \code{p.adj.method} isntead. +#' +#' @param p.adj.threshold A single numeric value (from 0 to 1) for selecting #' adjusted p-value threshold for filtering. -#' (By default: \code{p_adj_threshold = NULL}) +#' (By default: \code{p.adj.threshold = NULL}) +#' +#' @param p_adj_threshold Deprecated. Use \code{p.dj.threshold} instead. #' -#' @param cor_threshold A single numeric absolute value (from 0 to 1) for selecting +#' @param cor.threshold A single numeric absolute value (from 0 to 1) for selecting #' correlation threshold for filtering. -#' (By default: \code{cor_threshold = NULL}) +#' (By default: \code{cor.threshold = NULL}) +#' +#' @param cor_threshold Deprecated. Use \code{cor.threshold} instead. #' #' @param sort A single boolean value for selecting whether to sort features or not #' in result matrices. Used method is hierarchical clustering. #' (By default: \code{sort = FALSE}) #' -#' @param filter_self_correlations A single boolean value for selecting whether to +#' @param filter.self.cor A single boolean value for selecting whether to #' filter out correlations between identical items. Applies only when correlation #' between experiment itself is tested, i.e., when assays are identical. -#' (By default: \code{filter_self_correlations = FALSE}) +#' (By default: \code{filter.self.cor = FALSE}) +#' +#' @param filter_self_correlations Deprecated. Use \code{filter.self.cor} instead. #' #' @param verbose A single boolean value for selecting whether to get messages #' about progress of calculation. #' -#' @param test_significance A single boolean value for selecting whether to test +#' @param test.signif A single boolean value for selecting whether to test #' statistical significance of associations. -#' (By default: \code{test_significance = FALSE}) +#' (By default: \code{test.signif = FALSE}) +#' +#' @param test_significance Deprecated. Use \code{test.signif} instead. #' -#' @param show_warnings A single boolean value for selecting whether to show warnings +#' @param show.warnings A single boolean value for selecting whether to show warnings #' that might occur when correlations and p-values are calculated. +#' +#' @param show_warnings Deprecated. use \code{show.warnings} instead. #' #' @param paired A single boolean value for specifying if samples are paired or not. #' \code{colnames} must match between twp experiments. \code{paired} is disabled @@ -105,7 +115,7 @@ #' are calculated only for unique variable-pairs, and they are assigned to #' corresponding variable-pair. This decreases the number of calculations in 2-fold #' meaning faster execution. (By default: \code{symmetric = FALSE}) -#' \item \code{association_FUN}: A function that is used to calculate (dis-)similarity +#' \item \code{association.fun}: A function that is used to calculate (dis-)similarity #' between features. Function must take matrix as an input and give numeric #' values as an output. Adjust \code{method} and other parameters correspondingly. #' Supported functions are, for example, \code{stats::dist} and \code{vegan::vegdist}. @@ -115,7 +125,7 @@ #' The function \code{getCrossAssociation} calculates associations between #' features of two experiments. By default, it not only computes associations #' but also tests their significance. If desired, setting -#' \code{test_significance} to FALSE disables significance calculation. +#' \code{test.signif} to FALSE disables significance calculation. #' #' We recommend the non-parametric Kendall's tau as the default method for association #' analysis. Kendall's tau has desirable statistical properties and robustness at lower @@ -164,15 +174,15 @@ #' # Show first 5 entries #' head(result, 5) #' -#' # If test_significance = TRUE, then getCrossAssociation additionally returns +#' # If test.signif = TRUE, then getCrossAssociation additionally returns #' # significances -#' # filter_self_correlations = TRUE filters self correlations -#' # p_adj_threshold can be used to filter those features that do not +#' # filter.self.cor = TRUE filters self correlations +#' # p.adj.threshold can be used to filter those features that do not #' # have any correlations whose p-value is lower than the threshold #' result <- getCrossAssociation(mae[[1]], experiment2 = mae[[1]], method = "pearson", -#' filter_self_correlations = TRUE, -#' p_adj_threshold = 0.05, -#' test_significance = TRUE) +#' filter.self.cor = TRUE, +#' p.adj.threshold = 0.05, +#' test.signif = TRUE) #' # Show first 5 entries #' head(result, 5) #' @@ -182,7 +192,7 @@ #' # Calculate Bray-Curtis dissimilarity between samples. If dataset includes #' # paired samples, you can use paired = TRUE. #' result <- getCrossAssociation(mae[[1]], mae[[1]], MARGIN = 2, paired = FALSE, -#' association_FUN = vegan::vegdist, +#' association.fun = vegan::vegdist, #' method = "bray") #' #' @@ -212,8 +222,8 @@ #' # named assay.type from assay slot, it fetches a column named colData_variable #' # from colData. #' result <- getCrossAssociation(mae[[1]], assay.type1 = "counts", -#' colData_variable2 = c("shannon", "coverage"), -#' test_significance = TRUE) +#' col.var2 = c("shannon", "coverage"), +#' test.signif = TRUE) #' NULL @@ -233,19 +243,27 @@ setMethod("getCrossAssociation", signature = c(x = "MultiAssayExperiment"), assay.type2 = assay_name2, assay_name2 = "counts", altexp1 = NULL, altexp2 = NULL, + col.var1 = colData_variable1, colData_variable1 = NULL, + col.var2 = colData_variable2, colData_variable2 = NULL, MARGIN = 1, method = c("kendall", "spearman", "categorical", "pearson"), mode = "table", + p.adj.method = p_adj_method, p_adj_method = c("fdr", "BH", "bonferroni", "BY", "hochberg", "holm", "hommel", "none"), + p.adj.threshold = p_adj_threshold, p_adj_threshold = NULL, + cor.threshold = cor_threshold, cor_threshold = NULL, sort = FALSE, + filter.self.cor = filter_self_correlations, filter_self_correlations = FALSE, verbose = TRUE, + test.signif = test_significance, test_significance = FALSE, + show.warnings = show_warnings, show_warnings = TRUE, paired = FALSE, ...){ @@ -256,19 +274,19 @@ setMethod("getCrossAssociation", signature = c(x = "MultiAssayExperiment"), assay.type2 = assay.type2, altexp1 = altexp1, altexp2 = altexp2, - colData_variable1 = colData_variable1, - colData_variable2 = colData_variable2, + col.var1 = col.var1, + col.var2 = col.var2, MARGIN = MARGIN, method = method, mode = mode, - p_adj_method = p_adj_method, - p_adj_threshold = p_adj_threshold, - cor_threshold = cor_threshold, + p.adj.method = p.adj.method, + p.adj.threshold = p.adj.threshold, + cor.threshold = cor.threshold, sort = sort, - filter_self_correlations = filter_self_correlations, + filter.self.cor = filter.self.cor, verbose = verbose, - test_significance = test_significance, - show_warnings = show_warnings, + test.signif = test.signif, + show.warnings = show.warnings, paired = paired, ...) } @@ -347,20 +365,20 @@ setMethod("getCrossAssociation", signature = "SummarizedExperiment", assay.type2 = "counts", altexp1 = NULL, altexp2 = NULL, - colData_variable1 = NULL, - colData_variable2 = NULL, + col.var1 = NULL, + col.var2 = NULL, MARGIN = 1, method = c("kendall", "spearman", "categorical", "pearson"), mode = c("table", "matrix"), - p_adj_method = c("fdr", "BH", "bonferroni", "BY", "hochberg", + p.adj.method = c("fdr", "BH", "bonferroni", "BY", "hochberg", "holm", "hommel", "none"), - p_adj_threshold = NULL, - cor_threshold = NULL, + p.adj.threshold = NULL, + cor.threshold = NULL, sort = FALSE, - filter_self_correlations = FALSE, + filter.self.cor = FALSE, verbose = TRUE, - test_significance = FALSE, - show_warnings = TRUE, + test.signif = FALSE, + show.warnings = TRUE, paired = FALSE, ...){ ############################# INPUT CHECK ############################## @@ -379,13 +397,13 @@ setMethod("getCrossAssociation", signature = "SummarizedExperiment", tse2 <- .check_and_get_altExp(tse2, altexp2) # If variables from coldata are specified check them. Otherwise, # check assay.type1 - if( !is.null(colData_variable1) ){ - tse1 <- .check_and_subset_colData_variables(tse1, colData_variable1) + if( !is.null(col.var1) ){ + tse1 <- .check_and_subset_colData_variables(tse1, col.var1) } else{ .check_assay_present(assay.type1, tse1) } - if( !is.null(colData_variable2) ){ - tse2 <- .check_and_subset_colData_variables(tse2, colData_variable2) + if( !is.null(col.var2) ){ + tse2 <- .check_and_subset_colData_variables(tse2, col.var2) } else{ .check_assay_present(assay.type2, tse2) } @@ -394,38 +412,38 @@ setMethod("getCrossAssociation", signature = "SummarizedExperiment", stop("'MARGIN' must be 1 or 2.", call. = FALSE) } # Check method - # method is checked in .calculate_association. Otherwise association_FUN would + # method is checked in .calculate_association. Otherwise association.fun would # not work. (It can be "anything", and it might also have method parameter.) # Check mode mode <- match.arg(mode, c("table", "matrix")) - p_adj_method <- match.arg(p_adj_method, + p.adj.method <- match.arg(p.adj.method, c("fdr", "BH", "bonferroni", "BY", "hochberg", "holm", "hommel", "none")) - # Check p_adj_threshold - if( !(is.numeric(p_adj_threshold) && - (p_adj_threshold>=0 && p_adj_threshold<=1) || - is.null(p_adj_threshold) ) ){ - stop("'p_adj_threshold' must be a numeric value [0,1].", call. = FALSE) - } - # Check cor_threshold - if( !(is.numeric(cor_threshold) && - (cor_threshold>=0 && cor_threshold<=1) || - is.null(cor_threshold) ) ){ - stop("'cor_threshold' must be a numeric value [0,1].", call. = FALSE) + # Check p.adj.threshold + if( !(is.numeric(p.adj.threshold) && + (p.adj.threshold>=0 && p.adj.threshold<=1) || + is.null(p.adj.threshold) ) ){ + stop("'p.adj.threshold' must be a numeric value [0,1].", call. = FALSE) + } + # Check cor.threshold + if( !(is.numeric(cor.threshold) && + (cor.threshold>=0 && cor.threshold<=1) || + is.null(cor.threshold) ) ){ + stop("'cor.threshold' must be a numeric value [0,1].", call. = FALSE) } # Check sort if( !.is_a_bool(sort) ){ stop("'sort' must be a boolean value.", call. = FALSE) } - # Check filter_self_correlations - if( !.is_a_bool(filter_self_correlations) ){ - stop("'filter_self_correlations' must be a boolean value.", + # Check filter.self.cor + if( !.is_a_bool(filter.self.cor) ){ + stop("'filter.self.cor' must be a boolean value.", call. = FALSE) } - # Check test_significance - if( !.is_a_bool(test_significance) ){ - stop("'test_significance' must be a boolean value.", + # Check test.signif + if( !.is_a_bool(test.signif) ){ + stop("'test.signif' must be a boolean value.", call. = FALSE) } # Check verbose @@ -433,9 +451,9 @@ setMethod("getCrossAssociation", signature = "SummarizedExperiment", stop("'verbose' must be a boolean value.", call. = FALSE) } - # Check show_warnings - if( !.is_a_bool(show_warnings) ){ - stop("'show_warnings' must be a boolean value.", + # Check show.warnings + if( !.is_a_bool(show.warnings) ){ + stop("'show.warnings' must be a boolean value.", call. = FALSE) } # Check paired @@ -446,14 +464,14 @@ setMethod("getCrossAssociation", signature = "SummarizedExperiment", ############################ INPUT CHECK END ########################### # Fetch assays to correlate, if variables from coldata are specified, take # coldata, otherwise take assay - if( !is.null(colData_variable1) ){ + if( !is.null(col.var1) ){ assay1 <- colData(tse1) assay1 <- as.matrix(assay1) assay1 <- t(assay1) } else{ assay1 <- assay(tse1, assay.type1) } - if( !is.null(colData_variable2) ){ + if( !is.null(col.var2) ){ assay2 <- colData(tse2) assay2 <- as.matrix(assay2) assay2 <- t(assay2) @@ -472,43 +490,43 @@ setMethod("getCrossAssociation", signature = "SummarizedExperiment", # Check that assays match .check_that_assays_match(assay1, assay2, MARGIN) - # If significance is not calculated, p_adj_method is NULL - if( !test_significance ){ - p_adj_method <- NULL + # If significance is not calculated, p.adj.method is NULL + if( !test.signif ){ + p.adj.method <- NULL } # Calculate correlations result <- .calculate_association(assay1, assay2, method, - p_adj_method, - test_significance, - show_warnings, paired, + p.adj.method, + test.signif, + show.warnings, paired, verbose, MARGIN, assay.type1, assay.type2, altexp1, altexp2, - colData_variable1, colData_variable2, + col.var1, col.var2, ...) - # Disable p_adj_threshold if there is no adjusted p-values + # Disable p.adj.threshold if there is no adjusted p-values if( is.null(result$p_adj) ){ - p_adj_threshold <- NULL + p.adj.threshold <- NULL } - # Disable cor_threshold if there is no correlations + # Disable cor.threshold if there is no correlations if( is.null(result$cor) ){ - cor_threshold <- NULL + cor.threshold <- NULL } # Disable filter_self_correlation if assays are not the same if( !identical(assay1, assay2) ){ - filter_self_correlations <- FALSE + filter.self.cor <- FALSE } # Do filtering - if( !is.null(p_adj_threshold) || - !is.null(cor_threshold) || - filter_self_correlations ){ + if( !is.null(p.adj.threshold) || + !is.null(cor.threshold) || + filter.self.cor ){ # Filter associations result <- .association_filter(result, - p_adj_threshold, - cor_threshold, + p.adj.threshold, + cor.threshold, assay1, assay2, - filter_self_correlations, + filter.self.cor, verbose) } # Matrix or table? @@ -740,34 +758,35 @@ setMethod("getCrossAssociation", signature = "SummarizedExperiment", .calculate_association <- function(assay1, assay2, method = c("kendall", "spearman", "categorical", "pearson"), - p_adj_method, - test_significance, - show_warnings, + p.adj.method, + test.signif, + show.warnings, paired, verbose, MARGIN, assay.type1, assay.type2, altexp1, altexp2, - colData_variable1, colData_variable2, + col.var1, col.var2, + association.fun = association_FUN, association_FUN = NULL, ...){ - # Check method if association_FUN is not NULL - if( is.null(association_FUN) ){ + # Check method if association.fun is not NULL + if( is.null(association.fun) ){ method <- match.arg(method) # Get function name for message function_name <- ifelse(method == "categorical", "mia:::.calculate_gktau", - ifelse(test_significance, "stats::cor.test", "stats::cor")) + ifelse(test.signif, "stats::cor.test", "stats::cor")) # Test if data is in right format .cross_association_test_data_type(assay1, method, - colData_variable1) + col.var1) .cross_association_test_data_type(assay2, method, - colData_variable2) + col.var2) } else{ # Get name of function - function_name <- deparse(substitute(association_FUN)) - test_significance <- FALSE - p_adj_method <- NULL + function_name <- deparse(substitute(association.fun)) + test.signif <- FALSE + p.adj.method <- NULL } # Message details of calculation @@ -776,28 +795,28 @@ setMethod("getCrossAssociation", signature = "SummarizedExperiment", "Calculating correlations...\n", "altexp1: ", ifelse(!is.null(altexp1), altexp1, "-"), ", altexp2: ", ifelse(!is.null(altexp2), altexp2, "-"), - ifelse(!is.null(colData_variable1), - paste0(", assay.type1: -, colData_variable1: ", - paste(colData_variable1, collapse = " + ")), - paste0(", assay.type1: ", assay.type1, ", colData_variable1: -")), - ifelse(!is.null(colData_variable2), - paste0(", assay.type2: -, colData_variable2: ", - paste(colData_variable2, collapse = " + ")), - paste0(", assay.type2: ", assay.type2, ", colData_variable2: -")), + ifelse(!is.null(col.var1), + paste0(", assay.type1: -, col.var1: ", + paste(col.var1, collapse = " + ")), + paste0(", assay.type1: ", assay.type1, ", col.var1: -")), + ifelse(!is.null(col.var2), + paste0(", assay.type2: -, col.var2: ", + paste(col.var2, collapse = " + ")), + paste0(", assay.type2: ", assay.type2, ", col.var2: -")), "\nMARGIN: ", MARGIN, ", function: ", function_name, ", method: ", method, - ", test_significance: ", test_significance, - ", p_adj_method: ", - ifelse(!is.null(p_adj_method), p_adj_method, "-"), + ", test.signif: ", test.signif, + ", p.adj.method: ", + ifelse(!is.null(p.adj.method), p.adj.method, "-"), ", paired: ", paired, - ", show_warnings: ", show_warnings, "\n" + ", show.warnings: ", show.warnings, "\n" ) } - # If association_FUN is provided by user, use appropriate function. + # If association.fun is provided by user, use appropriate function. # Otherwise, choose correct method for numeric and categorical data - if( !is.null(association_FUN) ){ + if( !is.null(association.fun) ){ FUN_ <- .calculate_association_with_own_function } else if( method %in% c("kendall", "pearson","spearman") ) { FUN_ <- .calculate_association_for_numeric_values @@ -820,16 +839,16 @@ setMethod("getCrossAssociation", signature = "SummarizedExperiment", correlations_and_p_values <- .calculate_stats_cor(assay1 = assay1, assay2 = assay2, method = method, - show_warnings = show_warnings) + show.warnings = show.warnings) } else{ correlations_and_p_values <- .calculate_association_table(variable_pairs = variable_pairs, FUN_ = FUN_, - test_significance = test_significance, + test.signif = test.signif, assay1 = assay1, assay2 = assay2, method = method, - show_warnings = show_warnings, - association_FUN = association_FUN, + show.warnings = show.warnings, + association.fun = association.fun, ...) } @@ -847,7 +866,7 @@ setMethod("getCrossAssociation", signature = "SummarizedExperiment", if( !is.null(correlations_and_p_values$pval) ){ correlations_and_p_values$p_adj <- p.adjust(correlations_and_p_values$pval, - method = p_adj_method) + method = p.adj.method) } return(correlations_and_p_values) } @@ -883,12 +902,12 @@ setMethod("getCrossAssociation", signature = "SummarizedExperiment", # values in cor. Additionally, table can also include pval for p-values. .calculate_association_table <- function(variable_pairs, FUN_, - test_significance, + test.signif, assay1, assay2, method, - show_warnings, - association_FUN, + show.warnings, + association.fun, symmetric = FALSE, ...){ # Check symmetric @@ -920,12 +939,12 @@ setMethod("getCrossAssociation", signature = "SummarizedExperiment", # Calculate correlations correlations_and_p_values <- apply(variable_pairs, 1, FUN = FUN_, - test_significance = test_significance, + test.signif = test.signif, assay1 = assay1, assay2 = assay2, method = method, - show_warnings = show_warnings, - association_FUN = association_FUN, + show.warnings = show.warnings, + association.fun = association.fun, ...) # Convert into data.frame if it is vector, @@ -984,11 +1003,11 @@ setMethod("getCrossAssociation", signature = "SummarizedExperiment", # Output: correlation table #' @importFrom stats cor #' @importFrom tidyr pivot_longer -.calculate_stats_cor <- function(assay1, assay2, method, show_warnings){ +.calculate_stats_cor <- function(assay1, assay2, method, show.warnings){ # If user does not want warnings, # suppress warnings that might occur when calculating correlations (NAs...) # or p-values (ties, and exact p-values cannot be calculated...) - if( show_warnings ){ + if( show.warnings ){ correlations <- stats::cor(assay1, assay2, method = method, use = "pairwise.complete.obs") @@ -1025,8 +1044,8 @@ setMethod("getCrossAssociation", signature = "SummarizedExperiment", # Input: Vector of names that belong to feature pair, and assays. # Output: Correlation value or list that includes correlation value and p-value. #' @importFrom stats cor.test -.calculate_association_for_numeric_values <- function(feature_pair, test_significance, - assay1, assay2, method, show_warnings, +.calculate_association_for_numeric_values <- function(feature_pair, test.signif, + assay1, assay2, method, show.warnings, ...){ # Get features feature1 <- assay1[ , feature_pair[1]] @@ -1036,7 +1055,7 @@ setMethod("getCrossAssociation", signature = "SummarizedExperiment", # If user does not want warnings, # suppress warnings that might occur when calculating correlations (NAs...) # or p-values (ties, and exact p-values cannot be calculated...) - if( show_warnings ){ + if( show.warnings ){ temp <- cor.test(feature1, feature2, method = method, @@ -1063,10 +1082,10 @@ setMethod("getCrossAssociation", signature = "SummarizedExperiment", # Output: Correlation value or list that includes correlation value and p-value. .calculate_association_for_categorical_values <- function(feature_pair, - test_significance, + test.signif, assay1, assay2, - show_warnings, + show.warnings, ...){ # Get features feature1 <- assay1[ , feature_pair[1]] @@ -1078,10 +1097,10 @@ setMethod("getCrossAssociation", signature = "SummarizedExperiment", # Calculate cross-correlation using Goodman and Kruskal tau temp <- .calculate_gktau(feature1, feature2, - test_significance = test_significance, - show_warnings) + test.signif = test.signif, + show.warnings) # Whether to test significance - if( test_significance ){ + if( test.signif ){ # Take correlation and p-value temp <- c(temp$estimate, temp$p.value) } else{ @@ -1099,9 +1118,9 @@ setMethod("getCrossAssociation", signature = "SummarizedExperiment", # Output: Correlation value or list that includes correlation value and p-value. .calculate_association_with_own_function <- function(feature_pair, assay1, assay2, - association_FUN, - show_warnings, - test_significance, + association.fun, + show.warnings, + test.signif, ...){ # Get features feature1 <- assay1[ , feature_pair[1]] @@ -1113,23 +1132,23 @@ setMethod("getCrossAssociation", signature = "SummarizedExperiment", # suppress warnings that might occur when calculating correlations (NAs...) # or p-values (ties, and exact p-values cannot be calculated...) # Use try-catch to catch errors that might occur. - if( show_warnings ){ + if( show.warnings ){ temp <- tryCatch({ - do.call(association_FUN, args = c(list(feature_mat), list(...))) + do.call(association.fun, args = c(list(feature_mat), list(...))) }, error = function(cond) { stop("Error occurred during calculation. Check, e.g., that ", - "'association_FUN' fulfills requirements. 'association_FUN' ", + "'association.fun' fulfills requirements. 'association.fun' ", "threw a following error:\n", cond, call. = FALSE) }) } else { temp <- tryCatch({ - suppressWarnings( do.call(association_FUN, args = c(list(feature_mat), list(...))) ) + suppressWarnings( do.call(association.fun, args = c(list(feature_mat), list(...))) ) }, error = function(cond) { stop("Error occurred during calculation. Check, e.g., that ", - "'association_FUN' fulfills requirements. 'association_FUN' ", + "'association.fun' fulfills requirements. 'association.fun' ", "threw a following error:\n", cond, call. = FALSE) }) @@ -1138,7 +1157,7 @@ setMethod("getCrossAssociation", signature = "SummarizedExperiment", # If temp's length is not 1, then function does not return single numeric value for each pair if( length(temp) != 1 ){ stop("Error occurred during calculation. Check that ", - "'association_FUN' fulfills requirements.", + "'association.fun' fulfills requirements.", call. = FALSE) } return(temp) @@ -1150,32 +1169,32 @@ setMethod("getCrossAssociation", signature = "SummarizedExperiment", # Input: Correlation table and thresholds # Output: Filtered correlation table (or NULL if there are no observations after filtering) .association_filter <- function(result, - p_adj_threshold, - cor_threshold, + p.adj.threshold, + cor.threshold, assay1, assay2, - filter_self_correlations, + filter.self.cor, verbose){ # Give message if verbose == TRUE if(verbose){ message( "Filtering results...\np_adj_threshold: ", - ifelse(!is.null(p_adj_threshold), p_adj_threshold, "-"), - ", cor_threshold: ", - ifelse(!is.null(cor_threshold), cor_threshold, "-"), - ", filter_self_correlations: ", - ifelse(filter_self_correlations, - filter_self_correlations, "-"), "\n" ) + ifelse(!is.null(p.adj.threshold), p.adj.threshold, "-"), + ", cor.threshold: ", + ifelse(!is.null(cor.threshold), cor.threshold, "-"), + ", filter.self.cor: ", + ifelse(filter.self.cor, + filter.self.cor, "-"), "\n" ) } # Which features have significant correlations? - if ( !is.null(result$p_adj) && !is.null(p_adj_threshold) ) { + if ( !is.null(result$p_adj) && !is.null(p.adj.threshold) ) { # Get those feature-pairs that have significant correlations - result <- result[result$p_adj < p_adj_threshold & !is.na(result$p_adj), ] + result <- result[result$p_adj < p.adj.threshold & !is.na(result$p_adj), ] } # Which features have correlation over correlation threshold? - if ( !is.null(cor_threshold) ) { + if ( !is.null(cor.threshold) ) { # Get those feature-pairs that have correlations over threshold - result <- result[abs(result$cor) > cor_threshold & !is.na(result$cor), ] + result <- result[abs(result$cor) > cor.threshold & !is.na(result$cor), ] } # If there are no significant correlations @@ -1185,7 +1204,7 @@ setMethod("getCrossAssociation", signature = "SummarizedExperiment", } # Filter self correlations if it's specified - if ( filter_self_correlations ) { + if ( filter.self.cor ) { # Take only those rows where features differ result <- result[result$Var1 != result$Var2, ] } @@ -1383,7 +1402,7 @@ setMethod("getCrossAssociation", signature = "SummarizedExperiment", # Output: list of tau and p-value or just tau #' @importFrom DelayedMatrixStats rowSums2 colSums2 #' @importFrom stats chisq.test -.calculate_gktau <- function(x, y, test_significance = FALSE, show_warnings){ +.calculate_gktau <- function(x, y, test.signif = FALSE, show.warnings){ # First, compute the IxJ contingency table between x and y Nij <- table(x, y, useNA="ifany") # Next, convert this table into a joint probability estimate @@ -1405,14 +1424,14 @@ setMethod("getCrossAssociation", signature = "SummarizedExperiment", # If test significance is specified, then calculate significance with chi-squared test. # Are these two features independent or not? - if ( !test_significance ){ + if ( !test.signif ){ return(list(estimate = tau)) } # Do the Pearson's chi-squared test. # If user does not want warnings, # suppress warnings that might occur when there are ties, and exact p-value # cant be calculated - if( show_warnings ){ + if( show.warnings ){ temp <- chisq.test(x, y) } else { temp <- suppressWarnings( chisq.test(x, y) ) diff --git a/R/getPrevalence.R b/R/getPrevalence.R index 2bbc1193f..81c276127 100644 --- a/R/getPrevalence.R +++ b/R/getPrevalence.R @@ -9,16 +9,20 @@ #' #' @param detection Detection threshold for absence/presence. Either an #' absolute value compared directly to the values of \code{x} or a relative -#' value between 0 and 1, if \code{as_relative = FALSE}. +#' value between 0 and 1, if \code{as.relative = FALSE}. #' -#' @param include_lowest logical scalar: Should the lower boundary of the +#' @param include.lowest logical scalar: Should the lower boundary of the #' detection and prevalence cutoffs be included? (default: \code{FALSE}) +#' +#' @param include_lowest Deprecated. Use \code{include.lowest} instead. #' #' @param sort logical scalar: Should the result be sorted by prevalence? #' (default: \code{FALSE}) #' -#' @param as_relative logical scalar: Should the detection threshold be applied +#' @param as.relative logical scalar: Should the detection threshold be applied #' on compositional (relative) abundances? (default: \code{FALSE}) +#' +#' @param as_relative Deprecated. Use \code{as.relative} instead. #' #' @param assay.type A single character value for selecting the #' \code{\link[SummarizedExperiment:SummarizedExperiment-class]{assay}} @@ -56,7 +60,7 @@ #' the detection threshold. For \code{SummarizedExperiment} objects, the #' prevalence is calculated for the selected taxonomic rank, otherwise for the #' rows. The absolute population prevalence can be obtained by multiplying the -#' prevalence by the number of samples (\code{ncol(x)}). If \code{as_relative = +#' prevalence by the number of samples (\code{ncol(x)}). If \code{as.relative = #' FALSE} the relative frequency (between 0 and 1) is used to check against the #' \code{detection} threshold. #' @@ -119,7 +123,7 @@ #' prevalence.frequency <- getPrevalence(tse, #' detection = 0, #' sort = TRUE, -#' as_relative = TRUE) +#' as.relative = TRUE) #' head(prevalence.frequency) #' #' # Get prevalence estimates for phylums @@ -128,7 +132,7 @@ #' rank = "Phylum", #' detection = 0, #' sort = TRUE, -#' as_relative = TRUE) +#' as.relative = TRUE) #' head(prevalence.frequency) #' #' # - to obtain population counts, multiply frequencies with the sample size, @@ -144,7 +148,7 @@ #' rank = "Phylum", #' detection = 10, #' prevalence = 50/100, -#' as_relative = FALSE) +#' as.relative = FALSE) #' head(prevalent) #' #' # Gets a subset of object that includes prevalent taxa @@ -152,7 +156,7 @@ #' rank = "Family", #' detection = 0.001, #' prevalence = 0.55, -#' as_relative = TRUE) +#' as.relative = TRUE) #' altExp(tse, "prevalent") #' #' # getRare returns the inverse @@ -160,7 +164,7 @@ #' rank = "Phylum", #' detection = 1/100, #' prevalence = 50/100, -#' as_relative = TRUE) +#' as.relative = TRUE) #' head(rare) #' #' # Gets a subset of object that includes rare taxa @@ -168,7 +172,7 @@ #' rank = "Class", #' detection = 0.001, #' prevalence = 0.001, -#' as_relative = TRUE) +#' as.relative = TRUE) #' altExp(tse, "rare") #' #' # Names of both experiments, prevalent and rare, can be found from slot @@ -189,7 +193,8 @@ setGeneric("getPrevalence", signature = "x", #' @rdname getPrevalence #' @export setMethod("getPrevalence", signature = c(x = "ANY"), function( - x, detection = 0, include_lowest = FALSE, sort = FALSE, na.rm = TRUE, ...){ + x, detection = 0, include.lowest = include_lowest, include_lowest = FALSE, + sort = FALSE, na.rm = TRUE, ...){ # input check if (!.is_numeric_string(detection)) { stop("'detection' must be a single numeric value or coercible to ", @@ -202,8 +207,8 @@ setMethod("getPrevalence", signature = c(x = "ANY"), function( } # detection <- as.numeric(detection) - if(!.is_a_bool(include_lowest)){ - stop("'include_lowest' must be TRUE or FALSE.", call. = FALSE) + if(!.is_a_bool(include.lowest)){ + stop("'include.lowest' must be TRUE or FALSE.", call. = FALSE) } if(!.is_a_bool(sort)){ stop("'sort' must be TRUE or FALSE.", call. = FALSE) @@ -217,7 +222,7 @@ setMethod("getPrevalence", signature = c(x = "ANY"), function( warning(msg, call. = FALSE) } # - if (include_lowest) { + if (include.lowest) { prev <- x >= detection } else { prev <- x > detection @@ -235,7 +240,7 @@ setMethod("getPrevalence", signature = c(x = "ANY"), function( ) .agg_for_prevalence <- function( - x, rank, relabel = FALSE, make_unique = TRUE, na.rm = FALSE, + x, rank, relabel = FALSE, make.unique = TRUE, na.rm = FALSE, agg.na.rm = TRUE, ...){ # Check na.rm. It is not used in this function, it is only catched so that # it can be passed to getPrevalence(matrix) and not use it here in @@ -252,12 +257,12 @@ setMethod("getPrevalence", signature = c(x = "ANY"), function( if(!is.null(rank)){ .check_taxonomic_rank(rank, x) args <- c(list(x = x, rank = rank, na.rm = agg.na.rm), list(...)) - argNames <- c("x","rank","onRankOnly","na.rm","empty.fields", - "archetype","mergeTree","average","BPPARAM") + argNames <- c("x","rank","ignore.taxonomy","na.rm","empty.fields", + "archetype","update.tree","average","BPPARAM") args <- args[names(args) %in% argNames] x <- do.call(agglomerateByRank, args) if(relabel){ - rownames(x) <- getTaxonomyLabels(x, make_unique = make_unique) + rownames(x) <- getTaxonomyLabels(x, make.unique = make.unique) } } x @@ -267,17 +272,17 @@ setMethod("getPrevalence", signature = c(x = "ANY"), function( #' @export setMethod("getPrevalence", signature = c(x = "SummarizedExperiment"), function(x, assay.type = assay_name, assay_name = "counts", - as_relative = FALSE, rank = NULL, ...){ + as.relative = as_relative, as_relative = FALSE, rank = NULL, ...){ # input check - if(!.is_a_bool(as_relative)){ - stop("'as_relative' must be TRUE or FALSE.", call. = FALSE) + if(!.is_a_bool(as.relative)){ + stop("'as.relative' must be TRUE or FALSE.", call. = FALSE) } # check assay .check_assay_present(assay.type, x) x <- .agg_for_prevalence(x, rank = rank, ...) mat <- assay(x, assay.type) - if (as_relative) { + if (as.relative) { mat <- .calc_rel_abund(mat) } getPrevalence(mat, ...) @@ -288,7 +293,7 @@ setMethod("getPrevalence", signature = c(x = "SummarizedExperiment"), #' #' @param prevalence Prevalence threshold (in 0 to 1). The #' required prevalence is strictly greater by default. To include the -#' limit, set \code{include_lowest} to \code{TRUE}. +#' limit, set \code{include.lowest} to \code{TRUE}. #' #' @details #' \code{getPrevalent} returns taxa that are more prevalent with the @@ -311,7 +316,7 @@ setGeneric("getPrevalent", signature = "x", } .get_prevalent_indices <- function(x, prevalence = 50/100, - include_lowest = FALSE, ...){ + include.lowest = FALSE, ...){ # input check if (!.is_numeric_string(prevalence)) { stop("'prevalence' must be a single numeric value or coercible to ", @@ -320,8 +325,8 @@ setGeneric("getPrevalent", signature = "x", } prevalence <- as.numeric(prevalence) - if(!.is_a_bool(include_lowest)){ - stop("'include_lowest' must be TRUE or FALSE.", call. = FALSE) + if(!.is_a_bool(include.lowest)){ + stop("'include.lowest' must be TRUE or FALSE.", call. = FALSE) } # rownames must bet set and unique, because if sort = TRUE, the order is # not preserved @@ -329,7 +334,7 @@ setGeneric("getPrevalent", signature = "x", pr <- getPrevalence(x, rank = NULL, ...) # get logical vector which row does exceed threshold - if (include_lowest) { + if (include.lowest) { f <- pr >= prevalence } else { f <- pr > prevalence @@ -367,9 +372,10 @@ setGeneric("getPrevalent", signature = "x", #' @rdname getPrevalence #' @export setMethod("getPrevalent", signature = c(x = "ANY"), - function(x, prevalence = 50/100, include_lowest = FALSE, ...){ - .get_prevalent_taxa(x, rank = NULL, prevalence = prevalence, - include_lowest = include_lowest, ...) + function(x, prevalence = 50/100, include.lowest = include_lowest, + include_lowest = FALSE, ...){ + .get_prevalent_taxa(x, rank = NULL, prevalence = prevalence, + include.lowest = include.lowest, ...) } ) @@ -377,9 +383,9 @@ setMethod("getPrevalent", signature = c(x = "ANY"), #' @export setMethod("getPrevalent", signature = c(x = "SummarizedExperiment"), function(x, rank = NULL, prevalence = 50/100, - include_lowest = FALSE, ...){ - .get_prevalent_taxa(x, rank = rank, prevalence = prevalence, - include_lowest = include_lowest, ...) + include.lowest = include_lowest, include_lowest = FALSE, ...){ + .get_prevalent_taxa(x, rank = rank, prevalence = prevalence, + include.lowest = include.lowest, ...) } ) @@ -423,9 +429,10 @@ setGeneric("getRare", signature = "x", #' @rdname getPrevalence #' @export setMethod("getRare", signature = c(x = "ANY"), - function(x, prevalence = 50/100, include_lowest = FALSE, ...){ - .get_rare_taxa(x, rank = NULL, prevalence = prevalence, - include_lowest = include_lowest, ...) + function(x, prevalence = 50/100, include.lowest = include_lowest, + include_lowest = FALSE, ...){ + .get_rare_taxa(x, rank = NULL, prevalence = prevalence, + include.lowest = include.lowest, ...) } ) @@ -433,9 +440,9 @@ setMethod("getRare", signature = c(x = "ANY"), #' @export setMethod("getRare", signature = c(x = "SummarizedExperiment"), function(x, rank = NULL, prevalence = 50/100, - include_lowest = FALSE, ...){ - .get_rare_taxa(x, rank = rank, prevalence = prevalence, - include_lowest = include_lowest, ...) + include.lowest = include_lowest, include_lowest = FALSE, ...){ + .get_rare_taxa(x, rank = rank, prevalence = prevalence, + include.lowest = include.lowest, ...) } ) @@ -515,8 +522,10 @@ setMethod("getPrevalentAbundance", signature = c(x = "SummarizedExperiment"), #' @rdname agglomerate-methods #' -#' @param other_label A single \code{character} valued used as the label for the -#' summary of non-prevalent taxa. (default: \code{other_label = "Other"}) +#' @param other.label A single \code{character} valued used as the label for the +#' summary of non-prevalent taxa. (default: \code{other.label = "Other"}) +#' +#' @param other_label Deprecated. use \code{other.label} instead. #' #' @details #' \code{agglomerateByPrevalence} sums up the values of assays at the taxonomic @@ -524,7 +533,7 @@ setMethod("getPrevalentAbundance", signature = c(x = "SummarizedExperiment"), #' available) and selects the summed results that exceed the given population #' prevalence at the given detection level. The other summed values (below the #' threshold) are agglomerated in an additional row taking the name indicated by -#' \code{other_label} (by default "Other"). +#' \code{other.label} (by default "Other"). #' #' @return #' \code{agglomerateByPrevalence} returns a taxonomically-agglomerated object @@ -537,7 +546,7 @@ setMethod("getPrevalentAbundance", signature = c(x = "SummarizedExperiment"), #' rank = "Phylum", #' detection = 1/100, #' prevalence = 50/100, -#' as_relative = TRUE) +#' as.relative = TRUE) #' #' tse #' @@ -557,10 +566,10 @@ setGeneric("agglomerateByPrevalence", signature = "x", #' @rdname agglomerate-methods #' @export setMethod("agglomerateByPrevalence", signature = c(x = "SummarizedExperiment"), - function(x, rank = NULL, other_label = "Other", ...){ + function(x, rank = NULL, other.label = other_label, other_label = "Other", ...){ # input check - if(!.is_a_string(other_label)){ - stop("'other_label' must be a single character value.", + if(!.is_a_string(other.label)){ + stop("'other.label' must be a single character value.", call. = FALSE) } # @@ -576,9 +585,9 @@ setMethod("agglomerateByPrevalence", signature = c(x = "SummarizedExperiment"), check_assays = FALSE) rowData(other_x)[,colnames(rowData(other_x))] <- NA # set the other label - rownames(other_x) <- other_label + rownames(other_x) <- other.label if(!is.null(rank)){ - rowData(other_x)[,rank] <- other_label + rowData(other_x)[,rank] <- other.label } # temporary fix until TSE supports rbind class <- c("SingleCellExperiment","RangedSummarizedExperiment", diff --git a/R/importHumann.R b/R/importHumann.R index 63336e6e5..a35e2c1a1 100644 --- a/R/importHumann.R +++ b/R/importHumann.R @@ -3,19 +3,21 @@ #' @param file a single \code{character} value defining the file #' path of the HUMAnN file. The file must be in merged HUMAnN format. #' -#' @param colData a DataFrame-like object that includes sample names in +#' @param col.data a DataFrame-like object that includes sample names in #' rownames, or a single \code{character} value defining the file #' path of the sample metadata file. The file must be in \code{tsv} format -#' (default: \code{colData = NULL}). +#' (default: \code{col.data = NULL}). +#' +#' @param colData Deprecated. Use \code{col.data} instead. #' #' @param ... additional arguments: #' \itemize{ #' \item \code{assay.type}: A single character value for naming #' \code{\link[TreeSummarizedExperiment:TreeSummarizedExperiment-class]{assay}} #' (default: \code{assay.type = "counts"}) -#' \item \code{removeTaxaPrefixes}: \code{TRUE} or \code{FALSE}: Should +#' \item \code{prefix.rm}: \code{TRUE} or \code{FALSE}: Should #' taxonomic prefixes be removed? (default: -#' \code{removeTaxaPrefixes = FALSE}) +#' \code{prefix.rm = FALSE}) #' \item \code{remove.suffix}: \code{TRUE} or \code{FALSE}: Should #' suffixes of sample names be removed? HUMAnN pipeline adds suffixes #' to sample names. Suffixes are formed from file names. By selecting @@ -70,7 +72,7 @@ #' NULL -importHUMAnN <- function(file, colData = NULL, ...){ +importHUMAnN <- function(file, col.data = colData, colData = NULL, ...){ ################################ Input check ############################### if(!.is_non_empty_string(file)){ stop("'file' must be a single character value.", @@ -79,10 +81,10 @@ importHUMAnN <- function(file, colData = NULL, ...){ if (!file.exists(file)) { stop(file, " does not exist", call. = FALSE) } - if(!is.null(colData) && - !(.is_non_empty_string(colData) || is.data.frame(colData) || - is.matrix(colData) || is(colData, "DataFrame")) ){ - stop("'colData' must be a single character value, DataFrame or NULL.", + if(!is.null(col.data) && + !(.is_non_empty_string(col.data) || is.data.frame(col.data) || + is.matrix(col.data) || is(col.data, "DataFrame")) ){ + stop("'col.data' must be a single character value, DataFrame or NULL.", call. = FALSE) } ############################## Input check end ############################# @@ -92,9 +94,9 @@ importHUMAnN <- function(file, colData = NULL, ...){ data <- .read_humann(file, rowdata_col, ...) # Create TreeSE from the data tse <- .create_tse_from_humann(data, rowdata_col, ...) - # Add colData if provided - if( !is.null(colData) ){ - tse <- .add_coldata(tse, colData) + # Add col.data if provided + if( !is.null(col.data) ){ + tse <- .add_coldata(tse, col.data) } return(tse) } diff --git a/R/importMetaphlan.R b/R/importMetaphlan.R index ee2eb4f6c..cd95b14fb 100644 --- a/R/importMetaphlan.R +++ b/R/importMetaphlan.R @@ -3,19 +3,20 @@ #' @param file a single \code{character} value defining the file #' path of the Metaphlan file. The file must be in merged Metaphlan format. #' -#' @param colData a DataFrame-like object that includes sample names in +#' @param col.data a DataFrame-like object that includes sample names in #' rownames, or a single \code{character} value defining the file #' path of the sample metadata file. The file must be in \code{tsv} format -#' (default: \code{colData = NULL}). +#' (default: \code{col.data = NULL}). #' -#' @param sample_meta a DataFrame-like object that includes sample names in -#' rownames, or a single \code{character} value defining the file -#' path of the sample metadata file. The file must be in \code{tsv} format -#' (default: \code{sample_meta = NULL}). +#' @param colData Deprecated. use \code{col.data} instead. +#' +#' @param sample_meta Deprecated. Use \code{col.data} instead. #' -#' @param phy_tree a single \code{character} value defining the file +#' @param tree.file a single \code{character} value defining the file #' path of the phylogenetic tree. -#' (default: \code{phy_tree = NULL}). +#' (default: \code{tree.file = NULL}). +#' +#' @param phy_tree Deprecated. Use \code{tree.file} instead. #' #' @param ... additional arguments: #' \itemize{ @@ -25,9 +26,9 @@ #' \item \code{assay_name}: A single \code{character} value for specifying which #' assay to use for calculation. (Please use \code{assay.type} instead. #' At some point \code{assay_name} will be disabled.) -#' \item \code{removeTaxaPrefixes}: \code{TRUE} or \code{FALSE}: Should +#' \item \code{prefix.rm}: \code{TRUE} or \code{FALSE}: Should #' taxonomic prefixes be removed? (default: -#' \code{removeTaxaPrefixes = FALSE}) +#' \code{prefix.rm = FALSE}) #' \item \code{remove.suffix}: \code{TRUE} or \code{FALSE}: Should #' suffixes of sample names be removed? Metaphlan pipeline adds suffixes #' to sample names. Suffixes are formed from file names. By selecting @@ -93,7 +94,8 @@ NULL importMetaPhlAn <- function( - file, colData = sample_meta, sample_meta = NULL, phy_tree = NULL, ...){ + file, col.data = colData, colData = sample_meta, + sample_meta = NULL, tree.file = phy_tree, phy_tree = NULL, ...){ ################################ Input check ################################ if(!.is_non_empty_string(file)){ @@ -103,14 +105,14 @@ importMetaPhlAn <- function( if (!file.exists(file)) { stop(file, " does not exist", call. = FALSE) } - if(!is.null(colData) && - !(.is_non_empty_string(colData) || is.data.frame(colData) || - is.matrix(colData) || is(colData, "DataFrame")) ){ - stop("'colData' must be a single character value, DataFrame or NULL.", + if(!is.null(col.data) && + !(.is_non_empty_string(col.data) || is.data.frame(col.data) || + is.matrix(col.data) || is(col.data, "DataFrame")) ){ + stop("'col.data' must be a single character value, DataFrame or NULL.", call. = FALSE) } - if(!is.null(phy_tree) && !.is_non_empty_string(phy_tree)){ - stop("'phy_tree' must be a single character value or NULL.", + if(!is.null(tree.file) && !.is_non_empty_string(tree.file)){ + stop("'tree.file' must be a single character value or NULL.", call. = FALSE) } ############################## Input check end ############################# @@ -146,14 +148,14 @@ importMetaPhlAn <- function( .set_ranks_based_on_rowdata(tse,...) # Load sample meta data if it is provided - if( !is.null(colData) ) { - tse <- .add_coldata(tse, colData) + if( !is.null(col.data) ) { + tse <- .add_coldata(tse, col.data) } # Load tree if it is provided - if (!is.null(phy_tree)) { - tree <- ape::read.tree(phy_tree) + if (!is.null(tree.file)) { + tree <- ape::read.tree(tree.file) rowTree(tse) <- tree } @@ -296,7 +298,7 @@ importMetaPhlAn <- function( return(se) } -# This function can be used to add colData to TreeSE. It checks that sample +# This function can be used to add col.data to TreeSE. It checks that sample # names match (full or partial) and adds the metadata to altExps also. .add_coldata <- function(tse, coldata){ # If the coldata is character specifying the path @@ -332,7 +334,7 @@ importMetaPhlAn <- function( if( !(all(colnames(tse) %in% sample_names) && length(sample_names) == ncol(tse)) ){ warning( - "The sample names in 'colData' do not match with the data. ", + "The sample names in 'col.data' do not match with the data. ", "The sample metadata is not added.", call. = FALSE ) return(tse) @@ -345,7 +347,7 @@ importMetaPhlAn <- function( # Give warning if partial match was used if( !all(rownames(coldata) %in% colnames(tse)) ){ warning("Partial match was used to match sample names between ", - "'colData' and the data. Please check that they are correct.", + "'col.data' and the data. Please check that they are correct.", call. = FALSE ) # Replace colnames with names from sample metadata. They are without diff --git a/R/importMothur.R b/R/importMothur.R index bf391dd47..83e4256f2 100644 --- a/R/importMothur.R +++ b/R/importMothur.R @@ -3,25 +3,31 @@ #' This method creates a \code{TreeSummarizedExperiment} object from \code{Mothur} #' files provided as input. #' -#' @param sharedFile a single \code{character} value defining the file +#' @param assay.file a single \code{character} value defining the file #' path of the feature table to be imported. The File has to be in #' \code{shared file} format as defined in Mothur documentation. +#' +#' @param sharedFile Deprecated. Use \code{assay.file} instead. #' -#' @param taxonomyFile a single \code{character} value defining the file path of +#' @param row.file a single \code{character} value defining the file path of #' the taxonomy table to be imported. The File has to be in \code{taxonomy #' file} or \code{constaxonomy file} format as defined in Mothur -#' documentation. (default: \code{taxonomyFile = NULL}). +#' documentation. (default: \code{row.file = NULL}). +#' +#' @param taxonomyFile Deprecated. Use \code{row.file} instead. #' -#' @param designFile a single \code{character} value defining the file path of +#' @param col.file a single \code{character} value defining the file path of #' the sample metadata to be imported. The File has to be in \code{desing -#' file} format as defined in Mothur documentation. (default: \code{designFile +#' file} format as defined in Mothur documentation. (default: \code{col.file #' = NULL}). +#' +#' @param designFile Deprecated. Use \code{col.file} instead. #' #' @details #' Results exported from Mothur can be imported as a #' \code{SummarizedExperiment} using \code{importMothur}. Except for the -#' \code{sharedFile}, the other data types, \code{taxonomyFile}, and -#' \code{designFile}, are optional, but are highly encouraged to be provided. +#' \code{assay.file}, the other data types, \code{row.file}, and +#' \code{col.file}, are optional, but are highly encouraged to be provided. #' #' @return A #' \code{\link[TreeSummarizedExperiment:TreeSummarizedExperiment-class]{TreeSummarizedExperiment}} @@ -53,7 +59,7 @@ #' meta <- system.file("extdata", "mothur_example.design", package = "mia") #' #' # Creates se object from files -#' se <- importMothur(counts, taxa, meta) +#' se <- importMothur(assay.file = counts, row.file = taxa, col.file = meta) #' # Convert SE to TreeSE #' tse <- as(se, "TreeSummarizedExperiment") #' tse @@ -63,26 +69,29 @@ NULL #' @importFrom SummarizedExperiment SummarizedExperiment #' @importFrom S4Vectors make_zero_col_DFrame #' @export -importMothur <- function(sharedFile, - taxonomyFile = NULL, - designFile = NULL) { +importMothur <- function(assay.file = sharedFile, + sharedFile, + taxonomyFile = NULL, + row.file = taxonomyFile, + designFile = NULL, + col.file = designFile) { # input check - if(!.is_non_empty_string(sharedFile)){ - stop("'sharedFile' must be a single character value.", + if(!.is_non_empty_string(assay.file)){ + stop("'assay.file' must be a single character value.", call. = FALSE) } - if(!is.null(taxonomyFile) && !.is_non_empty_string(taxonomyFile)){ - stop("'taxonomyFile' must be a single character value or NULL.", + if(!is.null(row.file) && !.is_non_empty_string(row.file)){ + stop("'row.file' must be a single character value or NULL.", call. = FALSE) } - if(!is.null(designFile) && !.is_non_empty_string(designFile)){ - stop("'designFile' must be a single character value or NULL.", + if(!is.null(col.file) && !.is_non_empty_string(col.file)){ + stop("'col.file' must be a single character value or NULL.", call. = FALSE) } - # Reads the sharedFile - feature_tab_and_data_to_colData <- .read_mothur_feature(sharedFile) + # Reads the assay.file + feature_tab_and_data_to_colData <- .read_mothur_feature(assay.file) # Extracts feature_tab feature_tab <- feature_tab_and_data_to_colData$assay # Extracts data that goes to colData @@ -90,8 +99,8 @@ importMothur <- function(sharedFile, # If rowData information exists, gets that. Otherwise, tax_tab is just # data.frame without information - if (!is.null(taxonomyFile)) { - taxa_tab <- .read_mothur_taxonomy(taxonomyFile, feature_tab) + if (!is.null(row.file)) { + taxa_tab <- .read_mothur_taxonomy(row.file, feature_tab) } else { taxa_tab <- S4Vectors::make_zero_col_DFrame(nrow(feature_tab)) rownames(taxa_tab) <- rownames(feature_tab) @@ -99,8 +108,8 @@ importMothur <- function(sharedFile, # If colData informationor data_to_colData exists, gets that. Otherwise, # sample_tab is just data frame without information - if (!is.null(designFile) && !is.null(data_to_colData)) { - sample_meta <- .read_mothur_sample_meta(designFile, data_to_colData) + if (!is.null(col.file) && !is.null(data_to_colData)) { + sample_meta <- .read_mothur_sample_meta(col.file, data_to_colData) } else { sample_meta <- S4Vectors::make_zero_col_DFrame(ncol(feature_tab)) rownames(sample_meta) <- colnames(feature_tab) @@ -113,16 +122,16 @@ importMothur <- function(sharedFile, # These extra information must be added to colData. Return list of assay and # extra info -.read_mothur_feature <- function(sharedFile){ +.read_mothur_feature <- function(assay.file){ - if (!.is_mothur_shared_file(sharedFile)) { - stop("The input '", sharedFile, "' must be in `shared` format.", + if (!.is_mothur_shared_file(assay.file)) { + stop("The input '", assay.file, "' must be in `shared` format.", call. = FALSE) } # Stores name of columns will be included in colData not in assays MOTHUR_NON_ASSAY_COLS <- c("label","numOtus","Group") - data <- read.table(sharedFile, check.names=FALSE, header=TRUE, + data <- read.table(assay.file, check.names=FALSE, header=TRUE, sep="\t", stringsAsFactors=FALSE) # Checks that colnames contain information and it is not NULL if ( !(length(colnames(data)) > 0) || is.null(colnames(data)) ){ @@ -140,25 +149,25 @@ importMothur <- function(sharedFile, colData = colData)) } -.read_mothur_taxonomy <- function(taxonomyFile, feature_tab){ +.read_mothur_taxonomy <- function(row.file, feature_tab){ # If the file is in "cons.taxonomy" format - if (.is_mothur_constaxonomy_file(taxonomyFile, feature_tab)) { - data <- read.table(taxonomyFile, check.names=FALSE, + if (.is_mothur_constaxonomy_file(row.file, feature_tab)) { + data <- read.table(row.file, check.names=FALSE, header=TRUE, sep="\t", stringsAsFactors=FALSE) } # If the file is in "taxonomy" format, adds column names - else if (.is_mothur_taxonomy_file(taxonomyFile, feature_tab)){ - data <- read.table(taxonomyFile, check.names=FALSE, + else if (.is_mothur_taxonomy_file(row.file, feature_tab)){ + data <- read.table(row.file, check.names=FALSE, header=FALSE, sep="\t", stringsAsFactors=FALSE, col.names = c("OTU", "Taxonomy")) } # Else the file is not either gives an error else{ - stop("The input '", taxonomyFile, "' must be provided in the ", + stop("The input '", row.file, "' must be provided in the ", "`taxonomy` or `cons.taxonomy` format. In addition, it must ", - "match the data of the 'sharedFile'", + "match the data of the 'assay.file'", call. = FALSE) } @@ -192,17 +201,17 @@ importMothur <- function(sharedFile, return(rowData) } -.read_mothur_sample_meta <- function(designFile, data_to_colData){ +.read_mothur_sample_meta <- function(col.file, data_to_colData){ # Checks if file is in "design" format. data_to_colData$Group includes - # sample names that were extracted from assay, i.e. sharedFile - if (!.is_mothur_design_file(designFile, data_to_colData$Group)) { - stop("The input '", designFile, "' must be in `design` format, - and it must inlude same sample names as 'sharedFile'.", + # sample names that were extracted from assay, i.e. assay.file + if (!.is_mothur_design_file(col.file, data_to_colData$Group)) { + stop("The input '", col.file, "' must be in `design` format, + and it must inlude same sample names as 'assay.file'.", call. = FALSE) } # Reads the file - colData <- read.table(designFile, check.names=FALSE, + colData <- read.table(col.file, check.names=FALSE, header=TRUE, sep="\t", stringsAsFactors=FALSE) diff --git a/R/importQIIME2.R b/R/importQIIME2.R index a5ca1d9ab..2f65728c3 100644 --- a/R/importQIIME2.R +++ b/R/importQIIME2.R @@ -2,49 +2,61 @@ #' #' Results exported from QIMME2 can be imported as a #' \code{TreeSummarizedExperiment} using \code{importQIIME2}. Except for the -#' \code{featureTableFile}, the other data types, \code{taxonomyTableFile}, -#' \code{refSeqFile} and \code{phyTreeFile}, are optional, but are highly +#' \code{assay.file}, the other data types, \code{row.file}, +#' \code{refseq.file} and \code{tree.file}, are optional, but are highly #' encouraged to be provided. #' -#' @param featureTableFile a single \code{character} value defining the file +#' @param assay.file a single \code{character} value defining the file #' path of the feature table to be imported. +#' +#' @param featureTableFile Deprecated. use \code{assay.file} instead. #' -#' @param taxonomyTableFile a single \code{character} value defining the file +#' @param row.file a single \code{character} value defining the file #' path of the taxonomy table to be imported. (default: -#' \code{taxonomyTableFile = NULL}). +#' \code{row.file = NULL}). +#' +#' @param taxonomyTableFile Deprecated. use \code{row.file} instead. #' -#' @param sampleMetaFile a single \code{character} value defining the file path +#' @param col.file a single \code{character} value defining the file path #' of the sample metadata to be imported. The file has to be in tsv format. -#' (default: \code{sampleMetaFile = NULL}). +#' (default: \code{col.file = NULL}). +#' +#' @param sampleMetaFile Deprecated. Use \code{col.file} instead. #' -#' @param featureNamesAsRefSeq \code{TRUE} or \code{FALSE}: Should the feature +#' @param as.refseq \code{TRUE} or \code{FALSE}: Should the feature #' names of the feature table be regarded as reference sequences? This setting -#' will be disregarded, if \code{refSeqFile} is not \code{NULL}. If the +#' will be disregarded, if \code{refseq.file} is not \code{NULL}. If the #' feature names do not contain valid DNA characters only, the reference #' sequences will not be set. +#' +#' @param featureNamesAsRefSeq Deprecated. Use \code{as.refseq} instead. #' -#' @param refSeqFile a single \code{character} value defining the file path of -#' the reference sequences for each feature. (default: \code{refSeqFile = +#' @param refseq.file a single \code{character} value defining the file path of +#' the reference sequences for each feature. (default: \code{refseq.file = #' NULL}). +#' +#' @param refSeqFile Deprecated. Use \code{refseq.file} instead. #' -#' @param phyTreeFile a single \code{character} value defining the file path of -#' the phylogenetic tree. (default: \code{phyTreeFile = NULL}). +#' @param tree.file a single \code{character} value defining the file path of +#' the phylogenetic tree. (default: \code{tree.file = NULL}). +#' +#' @param phyTreeFile Deprecated. Use \code{tree.file} isntead. #' #' @param ... additional arguments: #' \itemize{ -#' \item \code{temp}: the temporary directory used for decompressing the +#' \item \code{temp.dir}: the temporary directory used for decompressing the #' data. (default: \code{tempdir()}) -#' \item \code{removeTaxaPrefixes}: \code{TRUE} or \code{FALSE}: Should +#' \item \code{prefix.rm}: \code{TRUE} or \code{FALSE}: Should #' taxonomic prefixes be removed? (default: -#' \code{removeTaxaPrefixes = FALSE}) +#' \code{prefix.rm = FALSE}) #' } #' #' @details -#' Both arguments \code{featureNamesAsRefSeq} and \code{refSeqFile} can be used -#' to define reference sequences of features. \code{featureNamesAsRefSeq} is -#' only taken into account, if \code{refSeqFile} is \code{NULL}. No reference +#' Both arguments \code{as.refseq} and \code{refseq.file} can be used +#' to define reference sequences of features. \code{as.refseq} is +#' only taken into account, if \code{refseq.file} is \code{NULL}. No reference #' sequences are tried to be created, if \code{featureNameAsRefSeq} is -#' \code{FALSE} and \code{refSeqFile} is \code{NULL}. +#' \code{FALSE} and \code{refseq.file} is \code{NULL}. #' #' @return A #' \code{\link[TreeSummarizedExperiment:TreeSummarizedExperiment-class]{TreeSummarizedExperiment}} @@ -68,83 +80,89 @@ #' \url{https://qiime2.org} #' #' @examples -#' featureTableFile <- system.file("extdata", "table.qza", package = "mia") -#' taxonomyTableFile <- system.file("extdata", "taxonomy.qza", package = "mia") -#' sampleMetaFile <- system.file("extdata", "sample-metadata.tsv", package = "mia") -#' phyTreeFile <- system.file("extdata", "tree.qza", package = "mia") -#' refSeqFile <- system.file("extdata", "refseq.qza", package = "mia") +#' assay.file <- system.file("extdata", "table.qza", package = "mia") +#' row.file <- system.file("extdata", "taxonomy.qza", package = "mia") +#' col.file <- system.file("extdata", "sample-metadata.tsv", package = "mia") +#' tree.file <- system.file("extdata", "tree.qza", package = "mia") +#' refseq.file <- system.file("extdata", "refseq.qza", package = "mia") #' tse <- importQIIME2( -#' featureTableFile = featureTableFile, -#' taxonomyTableFile = taxonomyTableFile, -#' sampleMetaFile = sampleMetaFile, -#' refSeqFile = refSeqFile, -#' phyTreeFile = phyTreeFile +#' assay.file = assay.file, +#' row.file = row.file, +#' col.file = col.file, +#' refseq.file = refseq.file, +#' tree.file = tree.file #' ) #' #' tse #' @importFrom S4Vectors make_zero_col_DFrame -importQIIME2 <- function(featureTableFile, +importQIIME2 <- function(assay.file = featureTableFile, + featureTableFile, + row.file = taxonomyTableFile, taxonomyTableFile = NULL, + col.file = sampleMetaFile, sampleMetaFile = NULL, + as.refseq = featureNamesAsRefSeq, featureNamesAsRefSeq = TRUE, + refseq.file = refSeqFile, refSeqFile = NULL, + tree.file = phyTreeFile, phyTreeFile = NULL, ...) { .require_package("yaml") # input check - if(!.is_non_empty_string(featureTableFile)){ - stop("'featureTableFile' must be a single character value.", + if(!.is_non_empty_string(assay.file)){ + stop("'assay.file' must be a single character value.", call. = FALSE) } - if(!is.null(taxonomyTableFile) && !.is_non_empty_string(taxonomyTableFile)){ - stop("'taxonomyTableFile' must be a single character value or NULL.", + if(!is.null(row.file) && !.is_non_empty_string(row.file)){ + stop("'row.file' must be a single character value or NULL.", call. = FALSE) } - if(!is.null(sampleMetaFile) && !.is_non_empty_string(sampleMetaFile)){ - stop("'sampleMetaFile' must be a single character value or NULL.", + if(!is.null(col.file) && !.is_non_empty_string(col.file)){ + stop("'col.file' must be a single character value or NULL.", call. = FALSE) } - if(!.is_a_bool(featureNamesAsRefSeq)){ - stop("'featureNamesAsRefSeq' must be TRUE or FALSE.", call. = FALSE) + if(!.is_a_bool(as.refseq)){ + stop("'as.refseq' must be TRUE or FALSE.", call. = FALSE) } - if(!is.null(refSeqFile) && !.is_non_empty_string(refSeqFile)){ - stop("'refSeqFile' must be a single character value or NULL.", + if(!is.null(refseq.file) && !.is_non_empty_string(refseq.file)){ + stop("'refseq.file' must be a single character value or NULL.", call. = FALSE) } - if(!is.null(phyTreeFile) && !.is_non_empty_string(phyTreeFile)){ - stop("'phyTreeFile' must be a single character value or NULL.", + if(!is.null(tree.file) && !.is_non_empty_string(tree.file)){ + stop("'tree.file' must be a single character value or NULL.", call. = FALSE) } # - feature_tab <- importQZA(featureTableFile, ...) + feature_tab <- importQZA(assay.file, ...) - if (!is.null(taxonomyTableFile)) { - taxa_tab <- importQZA(taxonomyTableFile, ...) + if (!is.null(row.file)) { + taxa_tab <- importQZA(row.file, ...) taxa_tab <- .subset_taxa_in_feature(taxa_tab, feature_tab) } else { taxa_tab <- S4Vectors::make_zero_col_DFrame(nrow(feature_tab)) rownames(taxa_tab) <- rownames(feature_tab) } - if (!is.null(sampleMetaFile)) { - sample_meta <- .read_q2sample_meta(sampleMetaFile) + if (!is.null(col.file)) { + sample_meta <- .read_q2sample_meta(col.file) } else { sample_meta <- S4Vectors::make_zero_col_DFrame(ncol(feature_tab)) rownames(sample_meta) <- colnames(feature_tab) } - if (!is.null(phyTreeFile)) { - tree <- importQZA(phyTreeFile, ...) + if (!is.null(tree.file)) { + tree <- importQZA(tree.file, ...) } else { tree <- NULL } # if row.names(feature_tab) is a DNA sequence, set it as refseq - if (!is.null(refSeqFile)){ - refseq <- importQZA(refSeqFile, ...) - } else if (featureNamesAsRefSeq) { + if (!is.null(refseq.file)){ + refseq <- importQZA(refseq.file, ...) + } else if (as.refseq) { refseq <- .rownames_as_dna_seq(rownames(feature_tab)) } else { refseq <- NULL @@ -169,8 +187,11 @@ importQIIME2 <- function(featureTableFile, #' table), `NewickDirectoryFormat` (phylogenetic tree ) and #' `DNASequencesDirectoryFormat` (representative sequences) are supported #' right now. -#' @param temp character, a temporary directory in which the qza file will be +#' @param temp.dir character, a temporary directory in which the qza file will be #' decompressed to, default `tempdir()`. +#' +#' @param temp Deprecated. Use \code{temp.dir} isntead. +#' #' @return `matrix` object for feature table, `DataFrame` for taxonomic table, #' [`ape::phylo`] object for phylogenetic tree, #' [`Biostrings::DNAStringSet-class`] for representative sequences of taxa. @@ -180,13 +201,13 @@ importQIIME2 <- function(featureTableFile, #' #' @examples #' # Read individual files -#' featureTableFile <- system.file("extdata", "table.qza", package = "mia") -#' taxonomyTableFile <- system.file("extdata", "taxonomy.qza", package = "mia") -#' sampleMetaFile <- system.file("extdata", "sample-metadata.tsv", package = "mia") +#' assay.file <- system.file("extdata", "table.qza", package = "mia") +#' row.file <- system.file("extdata", "taxonomy.qza", package = "mia") +#' col.file <- system.file("extdata", "sample-metadata.tsv", package = "mia") #' -#' assay <- importQZA(featureTableFile) -#' rowdata <- importQZA(taxonomyTableFile, removeTaxaPrefixes = TRUE) -#' coldata <- read.table(sampleMetaFile, header = TRUE, sep = "\t", comment.char = "") +#' assay <- importQZA(assay.file) +#' rowdata <- importQZA(row.file, prefix.rm = TRUE) +#' coldata <- read.table(col.file, header = TRUE, sep = "\t", comment.char = "") #' #' # Assign rownames #' rownames(coldata) <- coldata[, 1] @@ -202,7 +223,7 @@ importQIIME2 <- function(featureTableFile, #' @importFrom utils unzip #' @importFrom ape read.tree #' @importFrom Biostrings readDNAStringSet -importQZA <- function(file, temp = tempdir(), ...) { +importQZA <- function(file, temp.dir = temp, temp = tempdir(), ...) { if (!file.exists(file)) { stop(file, " does not exist", call. = FALSE) } @@ -211,7 +232,7 @@ importQZA <- function(file, temp = tempdir(), ...) { call. = FALSE) } - unzipped_file <- unzip(file, exdir = temp) + unzipped_file <- unzip(file, exdir = temp.dir) on.exit(unlink(c(unzipped_file,unique(dirname(unzipped_file))), recursive = TRUE)) meta_file <- grep("metadata.yaml", unzipped_file, value = TRUE) @@ -232,7 +253,7 @@ importQZA <- function(file, temp = tempdir(), ...) { "BIOMV", "TSVTaxonomyDirectoryFormat", "NewickDirectoryFormat", "DNASequencesDirectoryFormat" ) - file <- file.path(temp, uuid, "data", format_files[match(format, formats)]) + file <- file.path(temp.dir, uuid, "data", format_files[match(format, formats)]) res <- switch ( format, diff --git a/R/makeTreeSummarizedExperimentFromBiom.R b/R/makeTreeSummarizedExperimentFromBiom.R index 33116fb46..0f385ae33 100644 --- a/R/makeTreeSummarizedExperimentFromBiom.R +++ b/R/makeTreeSummarizedExperimentFromBiom.R @@ -6,18 +6,24 @@ #' #' @param file biom file location #' -#' @param removeTaxaPrefixes \code{TRUE} or \code{FALSE}: Should +#' @param prefix.rm \code{TRUE} or \code{FALSE}: Should #' taxonomic prefixes be removed? The prefixes is removed only from detected -#' taxa columns meaning that \code{rankFromPrefix} should be enabled in the most cases. -#' (default \code{removeTaxaPrefixes = FALSE}) +#' taxa columns meaning that \code{rank.from.prefix} should be enabled in the most cases. +#' (default \code{prefix.rm = FALSE}) #' -#' @param rankFromPrefix \code{TRUE} or \code{FALSE}: If file does not have +#' @param removeTaxaPrefixes Deprecated. Use \code{prefix.rm} instead. +#' +#' @param rank.from.prefix \code{TRUE} or \code{FALSE}: If file does not have #' taxonomic ranks on feature table, should they be scraped from prefixes? -#' (default \code{rankFromPrefix = FALSE}) +#' (default \code{rank.from.prefix = FALSE}) +#' +#' @param rankFromPrefix Deprecated.Use \code{rank.from.prefix} instead. #' -#' @param remove.artifacts \code{TRUE} or \code{FALSE}: If file have +#' @param artifact.rm \code{TRUE} or \code{FALSE}: If file have #' some taxonomic character naming artifacts, should they be removed. -#' (default \code{remove.artifacts = FALSE}) +#' (default \code{artifact.rm = FALSE}) +#' +#' @param remove.artifacts Deprecated. Use \code{artifact.rm} instead. #' #' @param ... additional arguments #' \itemize{ @@ -51,8 +57,8 @@ #' #' # Get taxonomyRanks from prefixes and remove prefixes #' tse <- importBIOM(biom_file, -#' rankFromPrefix = TRUE, -#' removeTaxaPrefixes = TRUE) +#' rank.from.prefix = TRUE, +#' prefix.rm = TRUE) #' #' # Load another biom file #' biom_file <- system.file("extdata/testdata", "Aggregated_humanization2.biom", @@ -60,7 +66,7 @@ #' #' # Clean artifacts from taxonomic data #' tse <- importBIOM(biom_file, -#' remove.artifacts = TRUE) +#' artifact.rm = TRUE) NULL #' @rdname makeTreeSEFromBiom @@ -74,32 +80,34 @@ importBIOM <- function(file, ...) { #' @rdname makeTreeSEFromBiom #' -#' @param obj object of type \code{\link[biomformat:read_biom]{biom}} +#' @param x object of type \code{\link[biomformat:read_biom]{biom}} #' #' @export #' @importFrom S4Vectors make_zero_col_DFrame DataFrame #' @importFrom dplyr %>% bind_rows makeTreeSEFromBiom <- function( - obj, removeTaxaPrefixes = FALSE, rankFromPrefix = FALSE, - remove.artifacts = FALSE, ...){ + x, prefix.rm = removeTaxaPrefixes, + removeTaxaPrefixes = FALSE, rank.from.prefix = rankFromPrefix, + rankFromPrefix = FALSE, + artifact.rm = remove.artifacts, remove.artifacts = FALSE, ...){ # input check .require_package("biomformat") - if(!is(obj,"biom")){ - stop("'obj' must be a 'biom' object", call. = FALSE) + if(!is(x,"biom")){ + stop("'x' must be a 'biom' object", call. = FALSE) } - if( !.is_a_bool(removeTaxaPrefixes) ){ - stop("'removeTaxaPrefixes' must be TRUE or FALSE.", call. = FALSE) + if( !.is_a_bool(prefix.rm) ){ + stop("'prefix.rm' must be TRUE or FALSE.", call. = FALSE) } - if( !.is_a_bool(rankFromPrefix) ){ - stop("'rankFromPrefix' must be TRUE or FALSE.", call. = FALSE) + if( !.is_a_bool(rank.from.prefix) ){ + stop("'rank.from.prefix' must be TRUE or FALSE.", call. = FALSE) } - if( !.is_a_bool(remove.artifacts) ){ - stop("'remove.artifacts' must be TRUE or FALSE.", call. = FALSE) + if( !.is_a_bool(artifact.rm) ){ + stop("'artifact.rm' must be TRUE or FALSE.", call. = FALSE) } # - counts <- as(biomformat::biom_data(obj), "matrix") - sample_data <- biomformat::sample_metadata(obj) - feature_data <- biomformat::observation_metadata(obj) + counts <- as(biomformat::biom_data(x), "matrix") + sample_data <- biomformat::sample_metadata(x) + feature_data <- biomformat::observation_metadata(x) # colData is initialized with empty tables with rownames if it is NULL if( is.null(sample_data) ){ @@ -156,12 +164,12 @@ makeTreeSEFromBiom <- function( } # Clean feature_data from possible character artifacts if specified - if( remove.artifacts ){ + if( artifact.rm ){ feature_data <- .detect_taxa_artifacts_and_clean(feature_data, ...) } # Replace taxonomy ranks with ranks found based on prefixes - if( rankFromPrefix && all( + if( rank.from.prefix && all( unlist(lapply(colnames(feature_data), function(x) !x %in% TAXONOMY_RANKS)))){ # Find ranks @@ -172,13 +180,13 @@ makeTreeSEFromBiom <- function( } # Remove prefixes if specified and rowData includes info - if(removeTaxaPrefixes && ncol(feature_data) > 0){ + if(prefix.rm && ncol(feature_data) > 0){ feature_data <- .remove_prefixes_from_taxa(feature_data, ...) } # Adjust row and colnames - rownames(counts) <- rownames(feature_data) <- biomformat::rownames(obj) - colnames(counts) <- rownames(sample_data) <- biomformat::colnames(obj) + rownames(counts) <- rownames(feature_data) <- biomformat::rownames(x) + colnames(counts) <- rownames(sample_data) <- biomformat::colnames(x) # Convert into DataFrame sample_data <- DataFrame(sample_data) @@ -195,11 +203,11 @@ makeTreeSEFromBiom <- function( } ####################### makeTreeSummarizedExperimentFromBiom ################### -#' @param obj object of type \code{\link[biomformat:read_biom]{biom}} +#' @param x object of type \code{\link[biomformat:read_biom]{biom}} #' @rdname makeTreeSEFromBiom #' @export -makeTreeSummarizedExperimentFromBiom <- function(obj, ...){ - makeTreeSEFromBiom(obj, ...) +makeTreeSummarizedExperimentFromBiom <- function(x, ...){ + makeTreeSEFromBiom(x, ...) } ################################ HELP FUNCTIONS ################################ diff --git a/R/makeTreeSummarizedExperimentFromPhyloseq.R b/R/makeTreeSummarizedExperimentFromPhyloseq.R index 79cad5f1a..15e64b916 100644 --- a/R/makeTreeSummarizedExperimentFromPhyloseq.R +++ b/R/makeTreeSummarizedExperimentFromPhyloseq.R @@ -5,7 +5,7 @@ #' #' All data stored in a \code{phyloseq} object is transferred. #' -#' @param obj a \code{phyloseq} object +#' @param x a \code{phyloseq} object #' #' @return An object of class \code{TreeSummarizedExperiment} #' @@ -30,41 +30,41 @@ #' data(esophagus, package="phyloseq") #' makeTreeSEFromPhyloseq(esophagus) #' } -makeTreeSEFromPhyloseq <- function(obj) { +makeTreeSEFromPhyloseq <- function(x) { # input check .require_package("phyloseq") - if(!is(obj,"phyloseq")){ - stop("'obj' must be a 'phyloseq' object") + if(!is(x,"phyloseq")){ + stop("'x' must be a 'phyloseq' object") } # # Get the assay - counts <- obj@otu_table@.Data + counts <- x@otu_table@.Data # Check the orientation, and transpose if necessary - if( !obj@otu_table@taxa_are_rows ){ + if( !x@otu_table@taxa_are_rows ){ counts <- t(counts) } # Create a list of assays assays <- SimpleList(counts = counts) - if(!is.null(obj@tax_table@.Data)){ - rowData <- DataFrame(data.frame(obj@tax_table@.Data)) + if(!is.null(x@tax_table@.Data)){ + rowData <- DataFrame(data.frame(x@tax_table@.Data)) } else{ rowData <- S4Vectors::make_zero_col_DFrame(nrow(assays$counts)) rownames(rowData) <- rownames(assays$counts) } - if(!is.null(obj@sam_data)){ - colData <- DataFrame(data.frame(obj@sam_data)) + if(!is.null(x@sam_data)){ + colData <- DataFrame(data.frame(x@sam_data)) } else{ colData <- S4Vectors::make_zero_col_DFrame(ncol(assays$counts)) rownames(colData) <- colnames(assays$counts) } - if(!is.null(obj@phy_tree)){ - rowTree <- obj@phy_tree + if(!is.null(x@phy_tree)){ + rowTree <- x@phy_tree } else { rowTree <- NULL } - if (!is.null(obj@refseq)) { - referenceSeq <- obj@refseq + if (!is.null(x@refseq)) { + referenceSeq <- x@refseq } else { referenceSeq <- NULL } @@ -78,13 +78,13 @@ makeTreeSEFromPhyloseq <- function(obj) { ####################### makeTreeSummarizedExperimentFromPhyloseq ####################### #' @rdname makeTreeSEFromPhyloseq #' @export -setGeneric("makeTreeSummarizedExperimentFromPhyloseq", signature = c("obj"), - function(obj) +setGeneric("makeTreeSummarizedExperimentFromPhyloseq", signature = c("x"), + function(x) standardGeneric("makeTreeSummarizedExperimentFromPhyloseq")) #' @rdname makeTreeSEFromPhyloseq #' @export -setMethod("makeTreeSummarizedExperimentFromPhyloseq", signature = c(obj = "ANY"), - function(obj){ - makeTreeSEFromPhyloseq(obj) +setMethod("makeTreeSummarizedExperimentFromPhyloseq", signature = c(x = "ANY"), + function(x){ + makeTreeSEFromPhyloseq(x) }) diff --git a/R/makephyloseqFromTreeSummarizedExperiment.R b/R/makephyloseqFromTreeSummarizedExperiment.R index 30a45618d..908a7052b 100644 --- a/R/makephyloseqFromTreeSummarizedExperiment.R +++ b/R/makephyloseqFromTreeSummarizedExperiment.R @@ -16,9 +16,11 @@ #' (Please use \code{assay.type} instead. At some point \code{assay_name} #' will be disabled.) #' -#' @param tree_name a single \code{character} value for specifying which +#' @param tree.name a single \code{character} value for specifying which #' tree will be included in the phyloseq object that is created, -#' (By default: \code{tree_name = "phylo"}) +#' (By default: \code{tree.name = "phylo"}) +#' +#' @param tree_name Deprecated. Use \code{tree.name} instead. #' #' @param ... additional arguments #' @@ -124,12 +126,12 @@ setMethod("makePhyloseqFromTreeSE", #' @export setMethod("makePhyloseqFromTreeSE", signature = c(x = "TreeSummarizedExperiment"), - function(x, tree_name = "phylo", ...){ - # If rowTrees exist, check tree_name + function(x, tree.name = tree_name, tree_name = "phylo", ...){ + # If rowTrees exist, check tree.name if( length(x@rowTree) > 0 ){ - .check_rowTree_present(tree_name, x) + .check_rowTree_present(tree.name, x) # Subset the data based on the tree - x <- x[ rowLinks(x)$whichTree == tree_name, ] + x <- x[ rowLinks(x)$whichTree == tree.name, ] add_phy_tree <- TRUE } else{ add_phy_tree <- FALSE @@ -157,7 +159,7 @@ setMethod("makePhyloseqFromTreeSE", # Add phylogenetic tree if( add_phy_tree ){ - phy_tree <- .get_rowTree_for_phyloseq(x, tree_name) + phy_tree <- .get_rowTree_for_phyloseq(x, tree.name) # If the object is a phyloseq object, adds phy_tree to it if(is(obj,"phyloseq")){ phyloseq::phy_tree(obj) <- phy_tree @@ -213,7 +215,7 @@ setMethod("makePhyloseqFromTreeSummarizedExperiment", signature = c(x = "ANY"), ################################ HELP FUNCTIONS ################################ # If tips do not match with rownames, prune the tree -.get_x_with_pruned_tree <- function(x, tree_name){ +.get_x_with_pruned_tree <- function(x, tree.name){ # Get rowLinks row_links <- rowLinks(x) # Gets node labels @@ -229,15 +231,15 @@ setMethod("makePhyloseqFromTreeSummarizedExperiment", signature = c(x = "ANY"), } # In phyloseq, tips and rownames must match -.get_rowTree_for_phyloseq <- function(x, tree_name){ +.get_rowTree_for_phyloseq <- function(x, tree.name){ # Check if the rowTree's tips match with rownames: # tips labels are found from rownames - if( any(!( rowTree(x, tree_name)$tip.label) %in% rownames(x)) ){ + if( any(!( rowTree(x, tree.name)$tip.label) %in% rownames(x)) ){ # If rowtree do not match, tree is pruned - x <- .get_x_with_pruned_tree(x, tree_name) + x <- .get_x_with_pruned_tree(x, tree.name) } # Get rowTree - phy_tree <- rowTree(x, tree_name) + phy_tree <- rowTree(x, tree.name) # Convert rowTree to phyloseq object phy_tree <- phyloseq::phy_tree(phy_tree) @@ -274,4 +276,4 @@ setMethod("makePhyloseqFromTreeSummarizedExperiment", signature = c(x = "ANY"), refSeqs <- NULL } return(refSeqs) -} +} \ No newline at end of file diff --git a/R/meltAssay.R b/R/meltAssay.R index e98b15357..172a97d9d 100644 --- a/R/meltAssay.R +++ b/R/meltAssay.R @@ -22,27 +22,35 @@ #' (Please use \code{assay.type} instead. At some point \code{assay_name} #' will be disabled.) #' -#' @param add_col_data \code{NULL}, \code{TRUE} or a \code{character} vector to +#' @param add.col \code{NULL}, \code{TRUE} or a \code{character} vector to #' select information from the \code{colData} to add to the molten assay data. -#' If \code{add_col_data = NULL} no data will be added, if -#' \code{add_col_data = TRUE} all data will be added and if -#' \code{add_col_data} is a \code{character} vector, it will be used to subset +#' If \code{add.col = NULL} no data will be added, if +#' \code{add.col = TRUE} all data will be added and if +#' \code{add.col} is a \code{character} vector, it will be used to subset #' to given column names in \code{colData}. (default: -#' \code{add_col_data = NULL}) +#' \code{add.col = NULL}) +#' +#' @param add_col_data Deprecated. Use \code{add.col} instead. #' -#' @param add_row_data \code{NULL}, \code{TRUE} or a \code{character} vector to +#' @param add.row \code{NULL}, \code{TRUE} or a \code{character} vector to #' select information from the \code{rowData} to add to the molten assay data. -#' If \code{add_row_data = NULL} no data will be added, if -#' \code{add_row_data = TRUE} all data will be added and if -#' \code{add_row_data} is a \code{character} vector, it will be used to subset +#' If \code{add.row = NULL} no data will be added, if +#' \code{add.row = TRUE} all data will be added and if +#' \code{add.row} is a \code{character} vector, it will be used to subset #' to given column names in \code{rowData}. (default: -#' \code{add_row_data = NULL}) +#' \code{add.row = NULL}) +#' +#' @param add_row_data Deprecated. Use \code{add.row} instead. #' -#' @param feature_name a \code{character} scalar to use as the output's name -#' for the feature identifier. (default: \code{feature_name = "FeatureID"}) +#' @param row.name a \code{character} scalar to use as the output's name +#' for the feature identifier. (default: \code{row.name = "FeatureID"}) +#' +#' @param feature_name Deprecated. Use \code{row.name} instead. #' -#' @param sample_name a \code{character} scalar to use as the output's name -#' for the sample identifier. (default: \code{sample_name = "SampleID"}) +#' @param col.name a \code{character} scalar to use as the output's name +#' for the sample identifier. (default: \code{col.name = "SampleID"}) +#' +#' @param sample_name Deprecated. Use \code{col.name} instead. #' #' @param ... optional arguments: #' \itemize{ @@ -66,8 +74,8 @@ #' data(GlobalPatterns) #' molten_tse <- meltSE(GlobalPatterns, #' assay.type = "counts", -#' add_row_data = TRUE, -#' add_col_data = TRUE +#' add.row = TRUE, +#' add.col = TRUE #' ) #' molten_tse NULL @@ -78,56 +86,60 @@ setGeneric("meltSE", signature = "x", function(x, assay.type = assay_name, assay_name = "counts", + add.row = add_row_data, add_row_data = NULL, + add.col = add_col_data, add_col_data = NULL, + row.name = feature_name, feature_name = "FeatureID", + col.name = sample_name, sample_name = "SampleID", ...) standardGeneric("meltSE") ) -.norm_add_row_data <- function(add_row_data, x, feature_name){ - if(is.null(add_row_data)){ +.norm_add_row_data <- function(add.row, x, row.name){ + if(is.null(add.row)){ return(NULL) } - if(anyNA(add_row_data)){ - stop("'add_row_data' contains NA.", call. = FALSE) + if(anyNA(add.row)){ + stop("'add.row' contains NA.", call. = FALSE) } cn <- colnames(rowData(x)) - if(is.logical(add_row_data) && length(add_row_data) == 1L && add_row_data){ - add_row_data <- cn - } else if (isFALSE(all(add_row_data %in% cn))) { - stop("Please provide valid column names with 'add_row_data' matching ", + if(is.logical(add.row) && length(add.row) == 1L && add.row){ + add.row <- cn + } else if (isFALSE(all(add.row %in% cn))) { + stop("Please provide valid column names with 'add.row' matching ", "those in 'rowData(x)'", call. = FALSE) } - if(!is.null(rownames(x)) && feature_name %in% add_row_data){ - warning("'x' contains a column '",feature_name,"' in its ", + if(!is.null(rownames(x)) && row.name %in% add.row){ + warning("'x' contains a column '",row.name,"' in its ", "rowData(), which will ", - "be renamed to '",feature_name,"_row'", call. = FALSE) + "be renamed to '",row.name,"_row'", call. = FALSE) } - add_row_data + return(add.row) } -.norm_add_col_data <- function(add_col_data, x, sample_name){ - if(is.null(add_col_data)){ +.norm_add_col_data <- function(add.col, x, col.name){ + if(is.null(add.col)){ return(NULL) } - if(anyNA(add_col_data)){ - stop("'add_col_data' contains NA.", call. = FALSE) + if(anyNA(add.col)){ + stop("'add.col' contains NA.", call. = FALSE) } cn <- colnames(colData(x)) - if(is.logical(add_col_data) && length(add_col_data) == 1L && add_col_data){ - add_col_data <- cn - } else if (isFALSE(all(add_col_data %in% cn))) { - stop("Please provide valid column names with 'add_col_data' matching ", + if(is.logical(add.col) && length(add.col) == 1L && add.col){ + add.col <- cn + } else if (isFALSE(all(add.col %in% cn))) { + stop("Please provide valid column names with 'add.col' matching ", "those in 'colData(x)'", call. = FALSE) } - if(!is.null(colnames(x)) && sample_name %in% add_col_data){ - warning("'x' contains a column '",sample_name,"' in its ", + if(!is.null(colnames(x)) && col.name %in% add.col){ + warning("'x' contains a column '",col.name,"' in its ", "colData(), which will ", - "be renamed to '",sample_name,"_col'", call. = FALSE) + "be renamed to '",col.name,"_col'", call. = FALSE) } - add_col_data + return(add.col) } .col_switch_name <- function(name){ @@ -140,27 +152,27 @@ setGeneric("meltSE", #' @importFrom dplyr mutate select .format_molten_assay <- function(molten_assay, x, - feature_name, - sample_name){ + row.name, + col.name){ if(is.null(rownames(x)) && - .row_switch_name(feature_name) %in% colnames(molten_assay) && - !anyNA(molten_assay[,.row_switch_name(feature_name)]) && - !anyDuplicated(rowData(x)[,feature_name])){ + .row_switch_name(row.name) %in% colnames(molten_assay) && + !anyNA(molten_assay[,.row_switch_name(row.name)]) && + !anyDuplicated(rowData(x)[,row.name])){ molten_assay <- molten_assay %>% - select(!sym(feature_name)) %>% - dplyr::rename(!!sym(feature_name) := !!sym(.row_switch_name(feature_name))) + select(!sym(row.name)) %>% + dplyr::rename(!!sym(row.name) := !!sym(.row_switch_name(row.name))) } if(is.null(colnames(x)) && - .col_switch_name(sample_name) %in% colnames(molten_assay) && - !anyNA(molten_assay[,.col_switch_name(sample_name)]) && - !anyDuplicated(colData(x)[,sample_name])){ + .col_switch_name(col.name) %in% colnames(molten_assay) && + !anyNA(molten_assay[,.col_switch_name(col.name)]) && + !anyDuplicated(colData(x)[,col.name])){ molten_assay %>% - select(!sym(sample_name)) %>% - dplyr::rename(!!sym(sample_name) := !!sym(.col_switch_name(sample_name))) + select(!sym(col.name)) %>% + dplyr::rename(!!sym(col.name) := !!sym(.col_switch_name(col.name))) } molten_assay %>% - mutate(!!sym(feature_name) := factor(!!sym(feature_name)), - !!sym(sample_name) := factor(!!sym(sample_name))) + mutate(!!sym(row.name) := factor(!!sym(row.name)), + !!sym(col.name) := factor(!!sym(col.name))) } @@ -169,20 +181,22 @@ setGeneric("meltSE", #' @export setMethod("meltSE", signature = c(x = "SummarizedExperiment"), function(x, - assay.type = assay_name, assay_name = "counts", - add_row_data = NULL, - add_col_data = NULL, - feature_name = "FeatureID", - sample_name = "SampleID", - ...) { + assay.type = assay_name, assay_name = "counts", + add.row = NULL, + add.col = NULL, + row.name = feature_name, + feature_name = "FeatureID", + col.name = sample_name, + sample_name = "SampleID", + ...) { # input check .check_assay_present(assay.type, x) - if(!.is_a_string(feature_name)){ - stop("'feature_name' must be a single non-empty character value.", + if(!.is_a_string(row.name)){ + stop("'row.name' must be a single non-empty character value.", call. = FALSE) } - if(!.is_a_string(sample_name)){ - stop("'sample_name' must be a single non-empty character value.", + if(!.is_a_string(col.name)){ + stop("'col.name' must be a single non-empty character value.", call. = FALSE) } # check if rownames are duplicated, and if they are, modify @@ -193,20 +207,20 @@ setMethod("meltSE", signature = c(x = "SummarizedExperiment"), call. = FALSE) } # check selected colnames - add_row_data <- .norm_add_row_data(add_row_data, x, feature_name) - add_col_data <- .norm_add_col_data(add_col_data, x, sample_name) - molten_assay <- .melt_assay(x, assay.type, feature_name, sample_name, ...) - if(!is.null(add_row_data)){ + add.row <- .norm_add_row_data(add.row, x, row.name) + add.col <- .norm_add_col_data(add.col, x, col.name) + molten_assay <- .melt_assay(x, assay.type, row.name, col.name, ...) + if(!is.null(add.row)){ molten_assay <- - .add_row_data_to_molten_assay(molten_assay, x, add_row_data, - feature_name) + .add_row_data_to_molten_assay(molten_assay, x, add.row, + row.name) } - if(!is.null(add_col_data)){ + if(!is.null(add.col)){ molten_assay <- - .add_col_data_to_molten_assay(molten_assay, x, add_col_data, - sample_name, ...) + .add_col_data_to_molten_assay(molten_assay, x, add.col, + col.name, ...) } - .format_molten_assay(molten_assay, x, feature_name, sample_name) + .format_molten_assay(molten_assay, x, row.name, col.name) } ) @@ -215,18 +229,19 @@ setMethod("meltSE", signature = c(x = "SummarizedExperiment"), #' @importFrom tibble rownames_to_column #' @importFrom tidyr pivot_longer #' @importFrom rlang sym -.melt_assay <- function(x, assay.type, feature_name, sample_name, check_names = FALSE,...) { +.melt_assay <- function(x, assay.type, row.name, col.name, + check.names = check_names, check_names = FALSE,...) { mat <- assay(x, assay.type) %>% as.matrix() rownames(mat) <- rownames(x) colnames(mat) <- colnames(x) mat %>% - data.frame(check.names = check_names) %>% - rownames_to_column(feature_name) %>% + data.frame(check.names = check.names) %>% + rownames_to_column(row.name) %>% # SampleID is unique sample id - pivot_longer(!sym(feature_name), + pivot_longer(!sym(row.name), values_to = assay.type, - names_to = sample_name) + names_to = col.name) } # Combines molten assay with rowData i.e. taxonomy table. @@ -234,18 +249,18 @@ setMethod("meltSE", signature = c(x = "SummarizedExperiment"), #' @importFrom rlang sym #' @importFrom tibble rownames_to_column #' @importFrom dplyr rename -.add_row_data_to_molten_assay <- function(molten_assay, x, add_row_data, - feature_name) { - rd <- SummarizedExperiment::rowData(x)[,add_row_data,drop=FALSE] %>% +.add_row_data_to_molten_assay <- function(molten_assay, x, add.row, + row.name) { + rd <- SummarizedExperiment::rowData(x)[,add.row,drop=FALSE] %>% data.frame() - if(feature_name %in% add_row_data){ + if(row.name %in% add.row){ rd <- rd %>% - dplyr::rename(!!sym(.row_switch_name(feature_name)) := !!sym(feature_name)) + dplyr::rename(!!sym(.row_switch_name(row.name)) := !!sym(row.name)) } rd <- rd %>% - rownames_to_column(feature_name) + rownames_to_column(row.name) molten_assay %>% - dplyr::left_join(rd, by = feature_name) + dplyr::left_join(rd, by = row.name) } # Combines molten assay and rowData i.e. taxonomy table with @@ -253,20 +268,20 @@ setMethod("meltSE", signature = c(x = "SummarizedExperiment"), #' @importFrom rlang sym #' @importFrom tibble rownames_to_column #' @importFrom dplyr rename -.add_col_data_to_molten_assay <- function(molten_assay, x, add_col_data, - sample_name, check_names = FALSE,...) { - cd <- SummarizedExperiment::colData(x)[,add_col_data,drop=FALSE] %>% +.add_col_data_to_molten_assay <- function(molten_assay, x, add.col, + col.name, check.names = check_names, check_names = FALSE,...) { + cd <- SummarizedExperiment::colData(x)[,add.col,drop=FALSE] %>% data.frame() # This makes sure that sample names match - if(check_names == TRUE){ + if(check.names){ rownames(cd) <- make.names(rownames(cd)) } - if(sample_name %in% add_col_data){ + if(col.name %in% add.col){ cd <- cd %>% - dplyr::rename(!!sym(.col_switch_name(sample_name)) := !!sym(sample_name)) + dplyr::rename(!!sym(.col_switch_name(col.name)) := !!sym(col.name)) } cd <- cd %>% - rownames_to_column(sample_name) + rownames_to_column(col.name) molten_assay %>% - dplyr::left_join(cd, by = sample_name) + dplyr::left_join(cd, by = col.name) } diff --git a/R/merge.R b/R/merge.R index 8a960a9d8..f6112f511 100644 --- a/R/merge.R +++ b/R/merge.R @@ -188,24 +188,24 @@ seq } -.merge_rows_TSE <- function(x, f, archetype = 1L, mergeTree = FALSE, - mergeRefSeq = FALSE, ...){ +.merge_rows_TSE <- function(x, f, archetype = 1L, update.tree = FALSE, + update.refseq = mergeRefSeq, mergeRefSeq = FALSE, ...){ # input check - if(!.is_a_bool(mergeTree)){ - stop("'mergeTree' must be TRUE or FALSE.", call. = FALSE) + if(!.is_a_bool(update.tree)){ + stop("'update.tree' must be TRUE or FALSE.", call. = FALSE) } - if(!.is_a_bool(mergeRefSeq)){ - stop("'mergeRefSeq' must be TRUE or FALSE.", call. = FALSE) + if(!.is_a_bool(update.refseq)){ + stop("'update.refseq' must be TRUE or FALSE.", call. = FALSE) } # for optionally merging referenceSeq refSeq <- NULL - if(mergeRefSeq){ + if(update.refseq){ refSeq <- referenceSeq(x) } # x <- .merge_rows(x, f, archetype = 1L, ...) # optionally merge rowTree - if( mergeTree ){ + if( update.tree ){ x <- .agglomerate_trees(x, 1) } # optionally merge referenceSeq @@ -215,15 +215,15 @@ x } -.merge_cols_TSE <- function(x, f, archetype = 1L, mergeTree = FALSE, ...){ +.merge_cols_TSE <- function(x, f, archetype = 1L, update.tree = FALSE, ...){ # input check - if(!.is_a_bool(mergeTree)){ - stop("'mergeTree' must be TRUE or FALSE.", call. = FALSE) + if(!.is_a_bool(update.tree)){ + stop("'update.tree' must be TRUE or FALSE.", call. = FALSE) } # x <- .merge_cols(x, f, archetype = 1L, ...) # optionally merge colTree - if( mergeTree ){ + if( update.tree ){ x <- .agglomerate_trees(x, 2) } return(x) diff --git a/R/mergeSEs.R b/R/mergeSEs.R index 134c7ce2c..02a2bc50f 100644 --- a/R/mergeSEs.R +++ b/R/mergeSEs.R @@ -16,20 +16,26 @@ #' Must be 'full', 'inner', 'left', or 'right'. 'left' and 'right' are disabled #' when more than two objects are being merged. (By default: \code{join = "full"}) #' -#' @param missing_values NA, 0, or a single character values specifying the notation -#' of missing values. (By default: \code{missing_values = NA}) +#' @param missing.values NA, 0, or a single character values specifying the notation +#' of missing values. (By default: \code{missing.values = NA}) #' -#' @param collapse_samples A boolean value for selecting whether to collapse identically -#' named samples to one. (By default: \code{collapse_samples = FALSE}) +#' @param missing_values Deprecated. Use \code{missing.values} instead. #' -#' @param collapse_features A boolean value for selecting whether to collapse identically +#' @param collapse.cols A boolean value for selecting whether to collapse identically +#' named samples to one. (By default: \code{collapse.cols = FALSE}) +#' +#' @param collapse_samples Deprecated. Use \code{collapse.cols} instead. +#' +#' @param collapse.rows A boolean value for selecting whether to collapse identically #' named features to one. Since all taxonomy information is taken into account, #' this concerns rownames-level (usually strain level) comparison. Often #' OTU or ASV level is just an arbitrary number series from sequencing machine #' meaning that the OTU information is not comparable between studies. With this #' option, it is possible to specify whether these strains are combined if their #' taxonomy information along with OTU number matches. -#' (By default: \code{collapse_features = TRUE}) +#' (By default: \code{collapse.rows = TRUE}) +#' +#' @param collapse_features Deprecated. Use \code{collapse.rows} instead. #' #' @param verbose A single boolean value to choose whether to show messages. #' (By default: \code{verbose = TRUE}) @@ -49,12 +55,12 @@ #' matching based on \code{rowData} is not done. For samples, collapsing #' is disabled by default meaning that equally named samples that are stored #' in different objects are interpreted as unique. Collapsing can be enabled -#' with \code{collapse_samples = TRUE} when equally named samples describe the same +#' with \code{collapse.cols = TRUE} when equally named samples describe the same #' sample. #' #' If, for example, all rows are not shared with #' individual objects, there are missing values in \code{assays}. The notation of missing -#' can be specified with the \code{missing_values} argument. If input consists of +#' can be specified with the \code{missing.values} argument. If input consists of #' \code{TreeSummarizedExperiment} objects, also \code{rowTree}, \code{colTree}, and #' \code{referenceSeq} are preserved if possible. The data is preserved if #' all the rows or columns can be found from it. @@ -110,7 +116,7 @@ #' #' # Merge a list of TreeSEs #' list <- SimpleList(tse1, tse2, tse3) -#' tse <- mergeSEs(list, assay.type = "counts", missing_values = 0) +#' tse <- mergeSEs(list, assay.type = "counts", missing.values = 0) #' tse #' #' # With 'join', it is possible to specify the merging method. Subsets are used @@ -119,9 +125,9 @@ #' tse_temp #' #' # If your objects contain samples that describe one and same sample, -#' # you can collapse equally named samples to one by specifying 'collapse_samples' +#' # you can collapse equally named samples to one by specifying 'collapse.cols' #' tse_temp <- mergeSEs(list(tse[1:10, 1], tse[1:20, 1], tse[1:5, 1]), -#' collapse_samples = TRUE, +#' collapse.cols = TRUE, #' join = "inner") #' tse_temp #' @@ -147,9 +153,10 @@ setGeneric("mergeSEs", signature = c("x"), #' @rdname mergeSEs #' @export setMethod("mergeSEs", signature = c(x = "SimpleList"), - function(x, assay.type="counts", assay_name = NULL, join = "full", - missing_values = NA, collapse_samples = FALSE, - collapse_features = TRUE, verbose = TRUE, + function(x, assay.type="counts", assay_name = NULL, join = "full", + missing.values = missing_values, missing_values = NA, + collapse.cols = collapse_samples, collapse_samples = FALSE, + collapse.rows = collapse_features, collapse_features = TRUE, verbose = TRUE, ... ){ ################## Input check ################## # Check the objects @@ -183,23 +190,23 @@ setMethod("mergeSEs", signature = c(x = "SimpleList"), "when more than two objects are being merged.", call. = FALSE) } - # Is missing_values one of the allowed ones - missing_values_bool <- length(missing_values) == 1L && - (is.numeric(missing_values) && missing_values == 0) || - .is_a_string(missing_values) || is.na(missing_values) + # Is missing.values one of the allowed ones + missing_values_bool <- length(missing.values) == 1L && + (is.numeric(missing.values) && missing.values == 0) || + .is_a_string(missing.values) || is.na(missing.values) # If not then give error if( !missing_values_bool ){ - stop("'missing_values' must be 0, NA, or a single character value.", + stop("'missing.values' must be 0, NA, or a single character value.", call. = FALSE) } - # Check collapse_samples - if( !.is_a_bool(collapse_samples) ){ - stop("'collapse_samples' must be TRUE or FALSE.", + # Check collapse.cols + if( !.is_a_bool(collapse.cols) ){ + stop("'collapse.cols' must be TRUE or FALSE.", call. = FALSE) } - # Check collapse_samples - if( !.is_a_bool(collapse_features) ){ - stop("'collapse_features' must be TRUE or FALSE.", + # Check collapse.cols + if( !.is_a_bool(collapse.rows) ){ + stop("'collapse.rows' must be TRUE or FALSE.", call. = FALSE) } # Check verbose @@ -216,8 +223,8 @@ setMethod("mergeSEs", signature = c(x = "SimpleList"), } # Merge objects tse <- .merge_SEs( - x, class, join, assay.type, missing_values, collapse_samples, - collapse_features, verbose) + x, class, join, assay.type, missing.values, collapse.cols, + collapse.rows, verbose) return(tse) } ) @@ -265,8 +272,8 @@ setMethod("mergeSEs", signature = c(x = "list"), #' @importFrom SingleCellExperiment SingleCellExperiment .merge_SEs <- function( - x, class, join, assay.type, missing_values, collapse_samples, - collapse_features, verbose){ + x, class, join, assay.type, missing.values, collapse.cols, + collapse.rows, verbose){ # Take first element and remove it from the list tse <- x[[1]] @@ -312,10 +319,10 @@ setMethod("mergeSEs", signature = c(x = "list"), temp <- .add_rowdata_to_rownames(temp, rownames_name = rownames_name) # Modify names if specified - if( !collapse_samples ){ + if( !collapse.cols ){ temp <- .get_unique_names(tse, temp, "col") } - if( !collapse_features ){ + if( !collapse.rows ){ temp <- .get_unique_names(tse, temp, "row") } # Merge data @@ -324,7 +331,7 @@ setMethod("mergeSEs", signature = c(x = "list"), tse2 = temp, join = join, assay.type = assay.type, - missing_values = missing_values + missing.values = missing.values ) # If class is TreeSE, get trees and links, and reference sequences if( class == "TreeSummarizedExperiment" ){ @@ -841,7 +848,7 @@ setMethod("mergeSEs", signature = c(x = "list"), # Input: Two SEs # Output: A list of arguments .merge_SummarizedExperiments <- function(tse1, tse2, join, - assay.type, missing_values){ + assay.type, missing.values){ # Merge rowData rowdata <- .merge_rowdata(tse1, tse2, join) # Merge colData @@ -849,7 +856,7 @@ setMethod("mergeSEs", signature = c(x = "list"), # Merge assays assays <- lapply(assay.type, .merge_assay, tse1 = tse1, tse2 = tse2, - join = join, missing_values = missing_values, + join = join, missing.values = missing.values, rd = rowdata, cd = coldata) assays <- SimpleList(assays) names(assays) <- assay.type @@ -871,7 +878,7 @@ setMethod("mergeSEs", signature = c(x = "list"), # missing values, merged rowData, and merged colData # Output: Merged assay .merge_assay <- function(tse1, tse2, assay.type, join, - missing_values, rd, cd){ + missing.values, rd, cd){ # Take assays assay1 <- assay(tse1, assay.type) assay2 <- assay(tse2, assay.type) @@ -883,7 +890,7 @@ setMethod("mergeSEs", signature = c(x = "list"), assay <- as.matrix(assay) # Fill missing values - assay[ is.na(assay) ] <- missing_values + assay[ is.na(assay) ] <- missing.values # Order the assay based on rowData and colData assay <- assay[ match(rownames(rd), rownames(assay)), , drop = FALSE ] diff --git a/R/rarefyAssay.R b/R/rarefyAssay.R index 08a9428d0..7b5437b37 100644 --- a/R/rarefyAssay.R +++ b/R/rarefyAssay.R @@ -27,9 +27,11 @@ #' (Please use \code{assay.type} instead. At some point \code{assay_name} #' will be disabled.) #' -#' @param min_size A single integer value equal to the number of counts being +#' @param sample A single integer value equal to the number of counts being #' simulated this can equal to lowest number of total counts -#' found in a sample or a user specified number. +#' found in a sample or a user specified number. +#' +#' @param min_size Deprecated. Use \code{sample} instead. #' #' @param replace Logical Default is \code{TRUE}. The default is with #' replacement (\code{replace=TRUE}). @@ -64,14 +66,14 @@ #' @name rarefyAssay #' #' @examples -#' # When samples in TreeSE are less than specified min_size, they will be removed. +#' # When samples in TreeSE are less than specified sample, they will be removed. #' # If after subsampling features are not present in any of the samples, #' # they will be removed. #' data(GlobalPatterns) #' tse <- GlobalPatterns #' set.seed(123) #' tse.subsampled <- rarefyAssay(tse, -#' min_size = 60000, +#' sample = 60000, #' name = "subsampled" #' ) #' tse.subsampled @@ -84,7 +86,7 @@ NULL #' @export setGeneric("rarefyAssay", signature = c("x"), function(x, assay.type = assay_name, assay_name = "counts", - min_size = min(colSums2(assay(x))), + sample = min_size, min_size = min(colSums2(assay(x))), replace = TRUE, name = "subsampled", verbose = TRUE, ...) standardGeneric("rarefyAssay")) @@ -95,9 +97,9 @@ setGeneric("rarefyAssay", signature = c("x"), #' @export setMethod("rarefyAssay", signature = c(x = "SummarizedExperiment"), function(x, assay.type = assay_name, assay_name = "counts", - min_size = min(colSums2(assay(x))), + sample = min_size, min_size = min(colSums2(assay(x))), replace = TRUE, - name = "subsampled", verbose = TRUE, ...){ + name = "subsampled", verbose = TRUE, ...){ warning("Subsampling/Rarefying may undermine downstream analyses ", "and have unintended consequences. Therefore, make sure ", @@ -129,20 +131,20 @@ setMethod("rarefyAssay", signature = c(x = "SummarizedExperiment"), call. = FALSE) } #set.seed(seed) - # Make sure min_size is of length 1. - if(length(min_size) > 1){ - stop("`min_size` had more than one value. ", + # Make sure sample is of length 1. + if(length(sample) > 1){ + stop("`sample` had more than one value. ", "Specify a single integer value.", call. = FALSE) - min_size <- min_size[1] + sample <- sample[1] } - if(!is.numeric(min_size) || - as.integer(min_size) != min_size && min_size <= 0){ - stop("min_size needs to be a positive integer value.", + if(!is.numeric(sample) || + as.integer(sample) != sample && sample <= 0){ + stop("sample needs to be a positive integer value.", call. = FALSE) } # get samples with less than min number of reads - if(min(colSums2(assay(x, assay.type))) < min_size){ - rmsams <- colnames(x)[colSums2(assay(x, assay.type)) < min_size] + if(min(colSums2(assay(x, assay.type))) < sample){ + rmsams <- colnames(x)[colSums2(assay(x, assay.type)) < sample] # Return NULL, if no samples were found after subsampling if( !any(!colnames(x) %in% rmsams) ){ stop("No samples were found after subsampling.", @@ -150,7 +152,7 @@ setMethod("rarefyAssay", signature = c(x = "SummarizedExperiment"), } if(verbose){ message(length(rmsams), " samples removed ", - "because they contained fewer reads than `min_size`.") + "because they contained fewer reads than `sample`.") } # remove sample(s) newtse <- x[, !colnames(x) %in% rmsams] @@ -159,7 +161,7 @@ setMethod("rarefyAssay", signature = c(x = "SummarizedExperiment"), } newassay <- apply(assay(newtse, assay.type), 2, .subsample_assay, - min_size=min_size, replace=replace) + sample=sample, replace=replace) rownames(newassay) <- rownames(newtse) # remove features not present in any samples after subsampling message(paste(length(which(rowSums2(newassay) == 0)), "features", @@ -170,13 +172,13 @@ setMethod("rarefyAssay", signature = c(x = "SummarizedExperiment"), assay(newtse, name, withDimnames=FALSE) <- newassay newtse <- .add_values_to_metadata(newtse, "subsampleCounts_min_size", - min_size) + sample) return(newtse) } ) ## Modified Sub sampling function from phyloseq internals -.subsample_assay <- function(x, min_size, replace){ +.subsample_assay <- function(x, sample, replace){ # Create replacement species vector rarvec <- numeric(length(x)) # Perform the sub-sampling. Suppress warnings due to old R compat issue. @@ -200,7 +202,7 @@ setMethod("rarefyAssay", signature = c(x = "SummarizedExperiment"), prob <- NULL } suppressWarnings(subsample <- sample(obsvec, - min_size, + sample, replace = replace, prob = prob)) # Tabulate the results (these are already named by the order in `x`) diff --git a/R/runDPCoA.R b/R/runDPCoA.R index 06f657be3..c40b36883 100644 --- a/R/runDPCoA.R +++ b/R/runDPCoA.R @@ -22,9 +22,11 @@ #' variances to use for dimensionality reduction. Alternatively \code{NULL}, #' if all features should be used. (default: \code{ntop = NULL}) #' -#' @param subset_row Vector specifying the subset of features to use for +#' @param subset.row Vector specifying the subset of features to use for #' dimensionality reduction. This can be a character vector of row names, an #' integer vector of row indices or a logical vector. +#' +#' @param subset_row Deprecated. Use \code{subset.row} instead. #' #' @param scale Logical scalar, should the expression values be standardized? #' @@ -42,9 +44,11 @@ #' (Please use \code{assay.type} instead. At some point \code{assay_name} #' will be disabled.) #' -#' @param tree_name a single \code{character} value for specifying which +#' @param tree.name a single \code{character} value for specifying which #' rowTree will be used in calculation. -#' (By default: \code{tree_name = "phylo"}) +#' (By default: \code{tree.name = "phylo"}) +#' +#' @param tree_name Deprecated. Use \code{tree.name} instead. #' #' @param altexp String or integer scalar specifying an alternative experiment #' containing the input data. @@ -92,8 +96,8 @@ setGeneric("getDPCoA", signature = c("x", "y"), standardGeneric("getDPCoA")) .calculate_dpcoa <- function(x, y, ncomponents = 2, ntop = NULL, - subset_row = NULL, scale = FALSE, - transposed = FALSE, ...) + subset.row = subset_row, subset_row = NULL, scale = FALSE, + transposed = FALSE, ...) { .require_package("ade4") # input check @@ -121,7 +125,7 @@ setGeneric("getDPCoA", signature = c("x", "y"), if(is.null(ntop)){ ntop <- nrow(x) } - x <- .get_mat_for_reddim(x, subset_row = subset_row, ntop = ntop, + x <- .get_mat_for_reddim(x, subset_row = subset.row, ntop = ntop, scale = scale) } y <- y[colnames(x), colnames(x), drop = FALSE] @@ -154,20 +158,20 @@ setMethod("getDPCoA", c("ANY","ANY"), .calculate_dpcoa) #' @rdname runDPCoA setMethod("getDPCoA", signature = c("TreeSummarizedExperiment","missing"), function(x, ..., assay.type = assay_name, assay_name = exprs_values, - exprs_values = "counts", tree_name = "phylo") + exprs_values = "counts", tree.name = tree_name, tree_name = "phylo") { .require_package("ade4") # Check assay.type .check_assay_present(assay.type, x) - # Check tree_name - .check_rowTree_present(tree_name, x) + # Check tree.name + .check_rowTree_present(tree.name, x) # # Get tree - tree <- rowTree(x, tree_name) + tree <- rowTree(x, tree.name) # Select only those features that are in the rowTree - whichTree <- rowLinks(x)[ , "whichTree"] == tree_name + whichTree <- rowLinks(x)[ , "whichTree"] == tree.name if( any(!whichTree) ){ - warning("Not all rows were present in the rowTree specified by 'tree_name'.", + warning("Not all rows were present in the rowTree specified by 'tree.name'.", "'x' is subsetted.", call. = FALSE) # Subset the data x <- x[ whichTree, ] diff --git a/R/runNMDS.R b/R/runNMDS.R index f458a30ec..f9497f6f9 100644 --- a/R/runNMDS.R +++ b/R/runNMDS.R @@ -15,15 +15,19 @@ #' @param ntop Numeric scalar specifying the number of features with the highest #' variances to use for dimensionality reduction. #' -#' @param subset_row Vector specifying the subset of features to use for +#' @param subset.row Vector specifying the subset of features to use for #' dimensionality reduction. This can be a character vector of row names, an #' integer vector of row indices or a logical vector. +#' +#' @param subset_row Deprecated. Use \code{subset.row} instead. #' #' @param scale Logical scalar, should the expression values be standardized? #' -#' @param keep_dist Logical scalar indicating whether the \code{dist} object +#' @param keep.dist Logical scalar indicating whether the \code{dist} object #' calculated by \code{FUN} should be stored as \sQuote{dist} attribute of #' the matrix returned/stored by \code{getNMDS}/ \code{addNMDS}. +#' +#' @param keep_dist Deprecated. Use \code{keep.dist} instead. #' #' @param transposed Logical scalar, is x transposed with cells in rows? #' @@ -42,19 +46,23 @@ #' @param FUN a \code{function} or \code{character} value with a function #' name returning a \code{\link[stats:dist]{dist}} object #' -#' @param nmdsFUN a \code{character} value to choose the scaling +#' @param nmds.fun a \code{character} value to choose the scaling #' implementation, either \dQuote{isoMDS} for #' \code{\link[MASS:isoMDS]{MASS::isoMDS}} or \dQuote{monoMDS} for #' \code{\link[vegan:monoMDS]{vegan::monoMDS}} +#' +#' @param nmdsFUN Deprecated. Use \code{nmds.fun} instead. #' #' @param ... additional arguments to pass to \code{FUN} and -#' \code{nmdsFUN}. +#' \code{nmds.fun}. #' #' @param dimred String or integer scalar specifying the existing dimensionality #' reduction results to use. #' -#' @param n_dimred Integer scalar or vector specifying the dimensions to use if +#' @param ndimred Integer scalar or vector specifying the dimensions to use if #' dimred is specified. +#' +#' @param n_dimred Deprecated. Use \code{ndimred} instead. #' #' @param altexp String or integer scalar specifying an alternative experiment #' containing the input data. @@ -69,7 +77,7 @@ #' Either \code{\link[MASS:isoMDS]{MASS::isoMDS}} or #' \code{\link[vegan:monoMDS]{vegan::monoMDS}} are used internally to compute #' the NMDS components. If you supply a custom \code{FUN}, make sure that -#' the arguments of \code{FUN} and \code{nmdsFUN} do not collide. +#' the arguments of \code{FUN} and \code{nmds.fun} do not collide. #' #' @name runNMDS #' @@ -121,8 +129,8 @@ setGeneric("getNMDS", function(x, ...) standardGeneric("getNMDS")) ans } -.format_nmds <- function(nmds, nmdsFUN, sample_names){ - ans <- switch(nmdsFUN, +.format_nmds <- function(nmds, nmds.fun, sample_names){ + ans <- switch(nmds.fun, "isoMDS" = .format_nmds_isoMDS(nmds), "monoMDS" = .format_nmds_monoMDS(nmds)) rownames(ans) <- sample_names @@ -138,9 +146,9 @@ setGeneric("getNMDS", function(x, ...) standardGeneric("getNMDS")) "smin","sfgrmin","sratmax")] } -.get_nmds_args <- function(nmdsFUN, ...){ +.get_nmds_args <- function(nmds.fun, ...){ args <- list(...) - args <- switch(nmdsFUN, + args <- switch(nmds.fun, "isoMDS" = .get_nmds_args_isoMDS(args), "monoMDS" = .get_nmds_args_monoMDS(args)) args <- args[!vapply(args,is.null,logical(1))] @@ -150,15 +158,17 @@ setGeneric("getNMDS", function(x, ...) standardGeneric("getNMDS")) #' @importFrom MASS isoMDS #' @importFrom stats cmdscale #' @importFrom vegan vegdist monoMDS -.calculate_nmds <- function(x, FUN = vegdist, +.calculate_nmds <- function(x, FUN = vegdist, + nmds.fun = nmdsFUN, nmdsFUN = c("isoMDS","monoMDS"), - ncomponents = 2, ntop = 500, subset_row = NULL, - scale = FALSE, transposed = FALSE, + ncomponents = 2, ntop = 500, subset.row = subset_row, + subset_row = NULL, scale = FALSE, transposed = FALSE, + keep.dist = keep_dist, keep_dist = FALSE, ...){ - nmdsFUN <- match.arg(nmdsFUN) - nmdsArgs <- .get_nmds_args(nmdsFUN, ...) + nmds.fun <- match.arg(nmds.fun) + nmdsArgs <- .get_nmds_args(nmds.fun, ...) if(!transposed) { - x <- .get_mat_for_reddim(x, subset_row = subset_row, ntop = ntop, + x <- .get_mat_for_reddim(x, subset_row = subset.row, ntop = ntop, scale = scale) } x <- as.matrix(x) @@ -168,11 +178,11 @@ setGeneric("getNMDS", function(x, ...) standardGeneric("getNMDS")) list(...))) attributes(sample_dist) <- attributes(sample_dist)[c("class","Size")] y <- cmdscale(sample_dist, k = ncomponents) - ans <- do.call(nmdsFUN, + ans <- do.call(nmds.fun, c(list(sample_dist, y = y, k = ncomponents), nmdsArgs)) - ans <- .format_nmds(ans, nmdsFUN, sample_names) - if (keep_dist) { + ans <- .format_nmds(ans, nmds.fun, sample_names) + if (keep.dist) { attr(ans,"dist") <- sample_dist } ans @@ -196,10 +206,10 @@ setMethod("getNMDS", "SummarizedExperiment", #' @export setMethod("getNMDS", "SingleCellExperiment", function(x, ..., assay.type = assay_name, assay_name = exprs_values, - exprs_values = "counts", dimred = NULL, n_dimred = NULL, - FUN = vegdist){ + exprs_values = "counts", dimred = NULL, ndimred = n_dimred, + n_dimred = NULL, FUN = vegdist){ mat <- .get_mat_from_sce(x, exprs_values = assay.type, - dimred = dimred, n_dimred = n_dimred) + dimred = dimred, n_dimred = ndimred) getNMDS(mat, transposed = !is.null(dimred), FUN = FUN,...) } ) diff --git a/R/splitByRanks.R b/R/splitByRanks.R index 98399bbcb..3cf99505a 100644 --- a/R/splitByRanks.R +++ b/R/splitByRanks.R @@ -19,10 +19,12 @@ #' will be dropped. This setting can be tweaked by defining #' \code{empty.fields} to your needs. (default: \code{na.rm = TRUE}) #' -#' @param keep_reducedDims \code{TRUE} or \code{FALSE}: Should the +#' @param keep.dimred \code{TRUE} or \code{FALSE}: Should the #' \code{reducedDims(x)} be transferred to the result? Please note, that this #' breaks the link between the data used to calculate the reduced dims. -#' (default: \code{keep_reducedDims = FALSE}) +#' (default: \code{keep.dimred = FALSE}) +#' +#' @param keep_reducedDims Deprecated. Use \code{keep.dimred} instead. #' #' @param as.list \code{TRUE} or \code{FALSE}: Should the list of #' \code{SummarizedExperiment} objects be returned by the function @@ -143,7 +145,7 @@ setMethod("agglomerateByRanks", signature = c(x = "SummarizedExperiment"), setMethod("agglomerateByRanks", signature = c(x = "SingleCellExperiment"), function(x, ranks = taxonomyRanks(x), na.rm = TRUE, as.list = FALSE, ...){ args <- .norm_args_for_split_by_ranks(na.rm = na.rm, ...) - args[["strip_altexp"]] <- TRUE + args[["altexp.rm"]] <- TRUE callNextMethod() } ) @@ -182,12 +184,12 @@ setGeneric("unsplitByRanks", #' @importFrom SingleCellExperiment reducedDims #' @importFrom SummarizedExperiment colData -.unsplit_by <- function(x, ses, keep_reducedDims, ...){ +.unsplit_by <- function(x, ses, keep.dimred, ...){ class_x <- class(x) # args <- list(assays = .unsplit_assays(ses), colData = colData(x)) - if(keep_reducedDims){ + if(keep.dimred){ args$reducedDims <- reducedDims(x) } rd <- .combine_rowData(ses) @@ -199,7 +201,7 @@ setGeneric("unsplitByRanks", } #' @importFrom SingleCellExperiment altExpNames altExp altExps -.unsplit_by_ranks <- function(x, ranks, keep_reducedDims, ...){ +.unsplit_by_ranks <- function(x, ranks, keep.dimred, ...){ ae_names <- altExpNames(x) ae_names <- ae_names[ae_names %in% ranks] if(length(ae_names) == 0L){ @@ -211,21 +213,22 @@ setGeneric("unsplitByRanks", ses[[i]] <- .remove_with_empty_taxonomic_info(ses[[i]], names(ses)[i], NA) } - ans <- .unsplit_by(x, ses, keep_reducedDims, ...) - rownames(ans) <- getTaxonomyLabels(ans, make_unique = FALSE) + ans <- .unsplit_by(x, ses, keep.dimred, ...) + rownames(ans) <- getTaxonomyLabels(ans, make.unique = FALSE) ans } #' @rdname agglomerate-methods #' @export setMethod("unsplitByRanks", signature = c(x = "SingleCellExperiment"), - function(x, ranks = taxonomyRanks(x), keep_reducedDims = FALSE, ...){ + function(x, ranks = taxonomyRanks(x), keep.dimred = keep_reducedDims, + keep_reducedDims = FALSE, ...){ # input check - if(!.is_a_bool(keep_reducedDims)){ - stop("'keep_reducedDims' must be TRUE or FALSE.", call. = FALSE) + if(!.is_a_bool(keep.dimred)){ + stop("'keep.dimred' must be TRUE or FALSE.", call. = FALSE) } # - .unsplit_by_ranks(x, ranks = ranks, keep_reducedDims = keep_reducedDims, + .unsplit_by_ranks(x, ranks = ranks, keep.dimred = keep.dimred, ...) } ) @@ -233,7 +236,8 @@ setMethod("unsplitByRanks", signature = c(x = "SingleCellExperiment"), #' @rdname agglomerate-methods #' @export setMethod("unsplitByRanks", signature = c(x = "TreeSummarizedExperiment"), - function(x, ranks = taxonomyRanks(x), keep_reducedDims = FALSE, ...){ + function(x, ranks = taxonomyRanks(x), keep.dimred = keep_reducedDims, + keep_reducedDims = FALSE, ...){ callNextMethod() } ) diff --git a/R/splitOn.R b/R/splitOn.R index 8b9a076b4..92e808679 100644 --- a/R/splitOn.R +++ b/R/splitOn.R @@ -13,25 +13,31 @@ #' Split by cols is not encouraged, since this is not compatible with #' storing the results in \code{altExps}. #' -#' @param keep_reducedDims \code{TRUE} or \code{FALSE}: Should the +#' @param keep.dimred \code{TRUE} or \code{FALSE}: Should the #' \code{reducedDims(x)} be transferred to the result? Please note, that this #' breaks the link between the data used to calculate the reduced dims. -#' (By default: \code{keep_reducedDims = FALSE}) +#' (By default: \code{keep.dimred = FALSE}) +#' +#' @param keep_reducedDims Deprecated. Use \code{keep.dimred} instead. #' -#' @param update_rowTree \code{TRUE} or \code{FALSE}: Should the rowTree be updated +#' @param update.tree \code{TRUE} or \code{FALSE}: Should the rowTree be updated #' based on splitted data? Option is enabled when \code{x} is a #' \code{TreeSummarizedExperiment} object or a list of such objects. -#' (By default: \code{update_rowTree = FALSE}) +#' (By default: \code{update.tree = FALSE}) +#' +#' @param update_rowTree Deprecated. Use \code{update.tree } instead. #' -#' @param altExpNames a \code{character} vector specifying the alternative experiments -#' to be unsplit. (By default: \code{altExpNames = names(altExps(x))}) +#' @param altexp a \code{character} vector specifying the alternative experiments +#' to be unsplit. (By default: \code{altexp = names(altExps(x))}) +#' +#' @param altExpNames Deprecated. Use \code{altexp} instead. #' #' @param ... Arguments passed to \code{agglomerateByVariable} function for #' \code{SummarizedExperiment} objects and other functions. #' See \code{\link[=agglomerate-methods]{agglomerateByVariable}} for more #' details. #' \itemize{ -#' \item{\code{use_names} A single boolean value to select whether to name elements of +#' \item{\code{use.names} A single boolean value to select whether to name elements of #' list by their group names.} #' } #' @@ -79,7 +85,7 @@ #' # Each element is named based on their group name. If you don't want to name #' # elements, use use_name = FALSE. Since "group" can be found from rowdata and colData #' # you must use MARGIN. -#' se_list <- splitOn(tse, f = "group", use_names = FALSE, MARGIN = 1) +#' se_list <- splitOn(tse, f = "group", use.names = FALSE, MARGIN = 1) #' #' # When column names are shared between elements, you can store the list to altExps #' altExps(tse) <- se_list @@ -87,7 +93,7 @@ #' altExps(tse) #' #' # If you want to split on columns and update rowTree, you can do -#' se_list <- splitOn(tse, f = colData(tse)$group, update_rowTree = TRUE) +#' se_list <- splitOn(tse, f = colData(tse)$group, update.tree = TRUE) #' #' # If you want to combine groups back together, you can use unsplitBy #' unsplitOn(se_list) @@ -102,8 +108,9 @@ setGeneric("splitOn", standardGeneric("splitOn")) # This function collects f (grouping variable), MARGIN, and -# use_names and returns them as a list. -.norm_args_for_split_by <- function(x, f, MARGIN = NULL, use_names = TRUE, ...){ +# use.names and returns them as a list. +.norm_args_for_split_by <- function(x, f, MARGIN = NULL, use.names = use_names, + use_names = TRUE, ...){ # input check # Check f if(is.null(f)){ @@ -204,15 +211,15 @@ setGeneric("splitOn", f <- addNA(f) } } - # Check use_names - if( !.is_a_bool(use_names) ){ - stop("'use_names' must be TRUE or FALSE.", + # Check use.names + if( !.is_a_bool(use.names) ){ + stop("'use.names' must be TRUE or FALSE.", call. = FALSE) } # Create a list from arguments list(f = f, MARGIN = MARGIN, - use_names = use_names) + use.names = use.names) } # PErform the split @@ -239,7 +246,7 @@ setGeneric("splitOn", ans <- SimpleList(lapply(idxs, subset_FUN, x = x, i = TRUE)) } # If user do not want to use names, unname - if(!args[["use_names"]]){ + if(!args[["use.names"]]){ ans <- unname(ans) # Otherwise convert NAs to "NA", if there is a level that do not have name } else{ @@ -266,7 +273,7 @@ setMethod("splitOn", signature = c(x = "SingleCellExperiment"), # Get arguments args <- .norm_args_for_split_by(x, f = f, ...) # Should alternative experiment be removed? --> yes - args[["strip_altexp"]] <- TRUE + args[["altexp.rm"]] <- TRUE # Split data .split_on(x, args, ...) } @@ -275,19 +282,19 @@ setMethod("splitOn", signature = c(x = "SingleCellExperiment"), #' @rdname splitOn #' @export setMethod("splitOn", signature = c(x = "TreeSummarizedExperiment"), - function(x, f = NULL, update_rowTree = FALSE, + function(x, f = NULL, update.tree = update_rowTree, update_rowTree = FALSE, ...){ # Input check - # Check update_rowTree - if( !.is_a_bool(update_rowTree) ){ - stop("'update_rowTree' must be TRUE or FALSE.", + # Check update.tree + if( !.is_a_bool(update.tree) ){ + stop("'update.tree' must be TRUE or FALSE.", call. = FALSE) } # Input check end # Split data x <- callNextMethod() # Manipulate rowTree or not? - if( update_rowTree ){ + if( update.tree ){ # If the returned value is a list, go through all of them if( is(x, 'SimpleList') ){ x <- SimpleList(lapply(x, .agglomerate_trees)) @@ -312,7 +319,7 @@ setGeneric("unsplitOn", standardGeneric("unsplitOn")) # Perform the unsplit -.list_unsplit_on <- function(ses, update_rowTree = FALSE, MARGIN = NULL, ...){ +.list_unsplit_on <- function(ses, update.tree = FALSE, MARGIN = NULL, ...){ # Input check is_check <- vapply(ses,is,logical(1L),"SummarizedExperiment") if(!all(is_check)){ @@ -320,9 +327,9 @@ setGeneric("unsplitOn", "only.", call. = FALSE) } - # Check update_rowTree - if( !.is_a_bool(update_rowTree) ){ - stop("'update_rowTree' must be TRUE or FALSE.", + # Check update.tree + if( !.is_a_bool(update.tree) ){ + stop("'update.tree' must be TRUE or FALSE.", call. = FALSE) } if( !(is.null(MARGIN) || (is.numeric(MARGIN) && (MARGIN == 1 || MARGIN == 2 ))) ){ @@ -386,7 +393,7 @@ setGeneric("unsplitOn", # IF the object is TreeSE. add rowTree if( class_x == "TreeSummarizedExperiment" ){ # Update or add old tree from the first element of list - if( update_rowTree ){ + if( update.tree ){ ans <- addHierarchyTree(ans) } else{ rowTree(ans) <- rowTree(ses[[1L]]) @@ -412,17 +419,17 @@ setGeneric("unsplitOn", #' @importFrom SingleCellExperiment altExpNames altExp altExps #' @export setMethod("unsplitOn", signature = c(x = "list"), - function(x, update_rowTree = FALSE, ...){ + function(x, update.tree = update_rowTree, update_rowTree = FALSE, ...){ # Unsplit list and create SCE, SE, or TreeSE from it - .list_unsplit_on(x, update_rowTree, ...) + .list_unsplit_on(x, update.tree, ...) } ) #' @rdname splitOn #' @importFrom SingleCellExperiment altExpNames altExp altExps #' @export setMethod("unsplitOn", signature = c(x = "SimpleList"), - function(x, update_rowTree = FALSE, ...){ - unsplitOn(as.list(x), update_rowTree, ...) + function(x, update.tree = update_rowTree, update_rowTree = FALSE, ...){ + unsplitOn(as.list(x), update.tree, ...) } ) @@ -430,24 +437,26 @@ setMethod("unsplitOn", signature = c(x = "SimpleList"), #' @importFrom SingleCellExperiment altExpNames altExp altExps reducedDims<- #' @export setMethod("unsplitOn", signature = c(x = "SingleCellExperiment"), - function(x, altExpNames = names(altExps(x)), keep_reducedDims = FALSE, ...){ + function(x, altexp = altExpNames, altExpNames = names(altExps(x)), + keep.dimred = keep_reducedDims, + keep_reducedDims = FALSE, ...){ # input check - if(!.is_a_bool(keep_reducedDims)){ - stop("'keep_reducedDims' must be TRUE or FALSE.", call. = FALSE) + if(!.is_a_bool(keep.dimred)){ + stop("'keep.dimred' must be TRUE or FALSE.", call. = FALSE) } # Get alternative experiment names since data is located there ae_names <- names(altExps(x)) # Get only those experiments that user has specified - ae_names <- ae_names[ae_names %in% altExpNames] + ae_names <- ae_names[ae_names %in% altexp] if(length(ae_names) == 0L){ - stop("No altExp matching 'altExpNames' in name.", call. = FALSE) + stop("No altExp matching 'altexp' in name.", call. = FALSE) } # Get alternative experiments as a list ses <- altExps(x)[ae_names] # And unsplit the data se <- .list_unsplit_on(ses, ...) # Add reducedDims if specified - if( keep_reducedDims ){ + if( keep.dimred ){ reducedDims(se) <- reducedDims(x) } return(se) diff --git a/R/summaries.R b/R/summaries.R index 32899433b..9f98a752c 100644 --- a/R/summaries.R +++ b/R/summaries.R @@ -138,7 +138,7 @@ setMethod("getTop", signature = c(x = "SummarizedExperiment"), # if(method == "prevalence"){ taxs <- getPrevalence(assay(x, assay.type), sort = TRUE, - include_lowest = TRUE, ...) + include.lowest = TRUE, ...) # If there are taxa with prevalence of 0, remove them taxs <- taxs[ taxs > 0 ] } else { diff --git a/R/taxonomy.R b/R/taxonomy.R index be70abe38..6a7af400c 100644 --- a/R/taxonomy.R +++ b/R/taxonomy.R @@ -40,16 +40,22 @@ #' regarded as empty. (Default: \code{c(NA, "", " ", "\t")}). They will be #' removed if \code{na.rm = TRUE} before agglomeration #' -#' @param with_rank \code{TRUE} or \code{FALSE}: Should the level be add as a +#' @param with.rank \code{TRUE} or \code{FALSE}: Should the level be add as a #' suffix? For example: "Phylum:Crenarchaeota" (default: -#' \code{with_rank = FALSE}) +#' \code{with.rank = FALSE}) +#' +#' @param with_rank Deprecated. Use \code{with.rank} instead. #' -#' @param make_unique \code{TRUE} or \code{FALSE}: Should the labels be made -#' unique, if there are any duplicates? (default: \code{make_unique = TRUE}) +#' @param make.unique \code{TRUE} or \code{FALSE}: Should the labels be made +#' unique, if there are any duplicates? (default: \code{make.unique = TRUE}) +#' +#' @param make_unique Deprecated. Use \code{make.unique} instead. #' -#' @param resolve_loops \code{TRUE} or \code{FALSE}: Should \code{resolveLoops} +#' @param resolve.loops \code{TRUE} or \code{FALSE}: Should \code{resolveLoops} #' be applied to the taxonomic data? Please note that has only an effect, -#' if the data is unique. (default: \code{resolve_loops = TRUE}) +#' if the data is unique. (default: \code{resolve.loops = TRUE}) +#' +#' @param resolve_loops Deprecated. Use \code{resolve.loops} instead. #' #' @param taxa a \code{character} vector, which is used for subsetting the #' taxonomic information. If no information is found,\code{NULL} is returned @@ -66,9 +72,11 @@ #' @param to a scalar \code{character} value, which must be a valid #' taxonomic rank. (default: \code{NULL}) #' -#' @param use_grepl \code{TRUE} or \code{FALSE}: should pattern matching via +#' @param use.grepl \code{TRUE} or \code{FALSE}: should pattern matching via #' \code{grepl} be used? Otherwise literal matching is used. #' (default: \code{FALSE}) +#' +#' @param use_grepl Deprecated. Use \code{use.grepl} instead. #' #' @param ... optional arguments not used currently. #' @@ -284,8 +292,9 @@ setGeneric("getTaxonomyLabels", #' @aliases checkTaxonomy #' @export setMethod("getTaxonomyLabels", signature = c(x = "SummarizedExperiment"), - function(x, empty.fields = c(NA, "", " ", "\t", "-", "_"), - with_rank = FALSE, make_unique = TRUE, resolve_loops = FALSE, ...){ + function(x, empty.fields = c(NA, "", " ", "\t", "-", "_"), with.rank = with_rank, + with_rank = FALSE, make.unique = make_unique, make_unique = TRUE, + resolve.loops = resolve_loops, resolve_loops = FALSE, ...){ # input check if(nrow(x) == 0L){ stop("No data available in `x` ('x' has nrow(x) == 0L.)", @@ -299,14 +308,14 @@ setMethod("getTaxonomyLabels", signature = c(x = "SummarizedExperiment"), stop("'empty.fields' must be a character vector with one or ", "more values.", call. = FALSE) } - if(!.is_a_bool(with_rank)){ - stop("'with_rank' must be TRUE or FALSE.", call. = FALSE) + if(!.is_a_bool(with.rank)){ + stop("'with.rank' must be TRUE or FALSE.", call. = FALSE) } - if(!.is_a_bool(make_unique)){ - stop("'make_unique' must be TRUE or FALSE.", call. = FALSE) + if(!.is_a_bool(make.unique)){ + stop("'make.unique' must be TRUE or FALSE.", call. = FALSE) } - if(!.is_a_bool(resolve_loops)){ - stop("'resolve_loops' must be TRUE or FALSE.", call. = FALSE) + if(!.is_a_bool(resolve.loops)){ + stop("'resolve.loops' must be TRUE or FALSE.", call. = FALSE) } # dup <- duplicated(rowData(x)[,taxonomyRanks(x)]) @@ -317,14 +326,14 @@ setMethod("getTaxonomyLabels", signature = c(x = "SummarizedExperiment"), } ans <- .get_taxonomic_label(x[!dup,], empty.fields = empty.fields, - with_rank = with_rank, - resolve_loops = resolve_loops) + with.rank = with.rank, + resolve.loops = resolve.loops) if(any(dup)){ ans <- ans[m] } # last resort - this happens, if annotation data contains ambiguous data # sometimes labeled as "circles" - if(make_unique && anyDuplicated(ans)){ + if(make.unique && anyDuplicated(ans)){ dup <- which(ans %in% ans[which(duplicated(ans))]) ans[dup] <- make.unique(ans[dup], sep = "_") } @@ -378,8 +387,8 @@ setMethod("getTaxonomyLabels", signature = c(x = "SummarizedExperiment"), .get_taxonomic_label <- function(x, empty.fields = c(NA, "", " ", "\t", "-", "_"), - with_rank = FALSE, - resolve_loops = FALSE){ + with.rank = FALSE, + resolve.loops = FALSE){ rd <- rowData(x) tax_cols <- .get_tax_cols_from_se(x) tax_ranks_selected <- .get_tax_ranks_selected(x, rd, tax_cols, empty.fields) @@ -388,7 +397,7 @@ setMethod("getTaxonomyLabels", signature = c(x = "SummarizedExperiment"), } tax_cols_selected <- tax_cols[tax_ranks_selected] # resolve loops - if(resolve_loops){ + if(resolve.loops){ td <- as.data.frame(rd[,tax_cols]) td <- suppressWarnings(resolveLoop(td)) rd[,tax_cols] <- as(td,"DataFrame") @@ -401,7 +410,7 @@ setMethod("getTaxonomyLabels", signature = c(x = "SummarizedExperiment"), tax_cols_selected, SIMPLIFY = FALSE) ans <- unlist(ans, use.names = FALSE) - if(with_rank || !all_same_rank){ + if(with.rank || !all_same_rank){ ans <- .add_taxonomic_type(rd, ans, tax_cols_selected) } ans @@ -540,7 +549,7 @@ setMethod("addHierarchyTree", signature = c(x = "SummarizedExperiment"), x <- as(x,"TreeSummarizedExperiment") # Get node labs: which row represents which node in the tree? node_labs <- getTaxonomyLabels( - x, with_rank = TRUE, resolve_loops = TRUE, make_unique = FALSE) + x, with.rank = TRUE, resolve.loops = TRUE, make.unique = FALSE) # Add tree x <- changeTree(x, tree, node_labs) return(x) @@ -554,11 +563,11 @@ setGeneric("mapTaxonomy", standardGeneric("mapTaxonomy")) #' @importFrom BiocGenerics %in% grepl -.get_taxa_row_match <- function(taxa, td, from, use_grepl = FALSE){ +.get_taxa_row_match <- function(taxa, td, from, use.grepl = FALSE){ if(is.na(taxa)){ r_f <- is.na(td[[from]]) } else { - if(use_grepl){ + if(use.grepl){ r_f <- grepl(taxa, td[[from]]) } else { r_f <- td[[from]] %in% taxa @@ -569,11 +578,11 @@ setGeneric("mapTaxonomy", } #' @importFrom BiocGenerics %in% grepl -.get_taxa_any_match <- function(taxa, td, use_grepl = FALSE){ +.get_taxa_any_match <- function(taxa, td, use.grepl = FALSE){ if(is.na(taxa)){ r_f <- is.na(td) } else { - if(use_grepl){ + if(use.grepl){ r_f <- vapply(td,grepl,logical(nrow(td)),pattern=taxa) } else { r_f <- t(as.matrix(td %in% taxa)) @@ -588,7 +597,8 @@ setGeneric("mapTaxonomy", #' @importFrom BiocGenerics %in% #' @export setMethod("mapTaxonomy", signature = c(x = "SummarizedExperiment"), - function(x, taxa = NULL, from = NULL, to = NULL, use_grepl = FALSE){ + function(x, taxa = NULL, from = NULL, to = NULL, use.grepl = use_grepl, + use_grepl = FALSE){ # input check if(!checkTaxonomy(x)){ stop("Non compatible taxonomic information found. ", @@ -626,8 +636,8 @@ setMethod("mapTaxonomy", signature = c(x = "SummarizedExperiment"), stop("'from' and 'to' must be different values.", call. = FALSE) } } - if(!.is_a_bool(use_grepl)){ - stop("'use_grepl' must be TRUE or FALSE.", call. = FALSE) + if(!.is_a_bool(use.grepl)){ + stop("'use.grepl' must be TRUE or FALSE.", call. = FALSE) } # td <- rowData(x)[,taxonomyRanks(x)] @@ -639,11 +649,11 @@ setMethod("mapTaxonomy", signature = c(x = "SummarizedExperiment"), c_f <- rep(TRUE,ncol(td)) if(!is.null(from)){ r_fs <- lapply(taxa, .get_taxa_row_match, td = td, from = from, - use_grepl = use_grepl) + use.grepl = use.grepl) names(r_fs) <- taxa } else { r_fs <- lapply(taxa, .get_taxa_any_match, td = td, - use_grepl = use_grepl) + use.grepl = use.grepl) names(r_fs) <- taxa } if(!is.null(to)) { @@ -688,10 +698,11 @@ setMethod("mapTaxonomy", signature = c(x = "SummarizedExperiment"), } #' @importFrom SummarizedExperiment rowData -.get_tax_groups <- function(x, col, onRankOnly = FALSE, ...){ +.get_tax_groups <- function(x, col, ignore.taxonomy = onRankOnly, + onRankOnly = FALSE, ...){ # input check - if(!.is_a_bool(onRankOnly)){ - stop("'onRankOnly' must be TRUE or FALSE.", call. = FALSE) + if(!.is_a_bool(ignore.taxonomy)){ + stop("'ignore.taxonomy' must be TRUE or FALSE.", call. = FALSE) } tax_cols <- .get_tax_cols_from_se(x) @@ -699,7 +710,7 @@ setMethod("mapTaxonomy", signature = c(x = "SummarizedExperiment"), if(length(tax_col_n) < col){ stop(".") } - if(onRankOnly){ + if(ignore.taxonomy){ groups <- rowData(x)[,tax_cols[tax_col_n == col],drop=TRUE] } else { groups <- rowData(x)[,tax_cols[tax_col_n <= col],drop=FALSE] diff --git a/R/transformCounts.R b/R/transformCounts.R index e4092f21a..294294b55 100644 --- a/R/transformCounts.R +++ b/R/transformCounts.R @@ -33,9 +33,10 @@ #' #' @param ... additional arguments passed on to \code{vegan:decostand}: #' \itemize{ -#' \item \code{ref_vals}: A single value which will be used to fill +#' \item \code{reference}: A single value which will be used to fill #' reference sample's column in returned assay when calculating alr. -#' (default: \code{ref_vals = NA}) +#' (default: \code{reference = NA}) +#' \item \code{ref_vals} Deprecated. Use \code{reference} instead. #' } #' @details #' @@ -296,11 +297,12 @@ setMethod("transformAssay", signature = c(x = "SummarizedExperiment"), # Help function for transformAssay, takes abundance # table as input and returns transformed table. This function utilizes vegan's # transformation functions. -.apply_transformation_from_vegan <- function(mat, method, MARGIN, ref_vals = NA, ...){ +.apply_transformation_from_vegan <- function(mat, method, MARGIN, reference = ref_vals, + ref_vals = NA, ...){ # Input check - # Check ref_vals - if( length(ref_vals) != 1 ){ - stop("'ref_vals' must be a single value specifying the ", + # Check reference + if( length(reference) != 1 ){ + stop("'reference' must be a single value specifying the ", "values of the reference sample.", call. = FALSE) } @@ -324,7 +326,7 @@ setMethod("transformAssay", signature = c(x = "SummarizedExperiment"), if( method %in% c("alr") ){ transformed_table <- .adjust_alr_table( mat = transformed_table, orig_dimnames = orig_dimnames, - ref_vals = ref_vals) + reference = reference) } # If table is transposed (like in chi.square), transpose back if(identical(rownames(transformed_table), colnames(mat)) && @@ -373,7 +375,7 @@ setMethod("transformAssay", signature = c(x = "SummarizedExperiment"), # vegan::decostand returns ALR transformed abundance table without reference # sample. Because in TreeSE all assays must have same row and column names, # the reference sample is assigned back to transformed abundance table. -.adjust_alr_table <- function(mat, orig_dimnames, ref_vals){ +.adjust_alr_table <- function(mat, orig_dimnames, reference){ # Store attributes attributes <- attributes(mat) # Get original and current sample/feature names and dimensions of reference @@ -392,7 +394,7 @@ setMethod("transformAssay", signature = c(x = "SummarizedExperiment"), ref_dimnames <- list(var_names, reference_name) } # Reference sample as NAs or with symbols that are specified by user - reference_sample <- matrix(ref_vals, nrow = nrow, ncol = ncol, + reference_sample <- matrix(reference, nrow = nrow, ncol = ncol, dimnames = ref_dimnames) # Add reference sample/feature if(MARGIN == 1){ diff --git a/R/utils.R b/R/utils.R index a507bf4ac..7be9bfa5f 100644 --- a/R/utils.R +++ b/R/utils.R @@ -80,25 +80,25 @@ } } -.check_rowTree_present <- function(tree_name, x, - name = .get_name_in_parent(tree_name) ){ - if( !.is_non_empty_string(tree_name) ){ +.check_rowTree_present <- function(tree.name, x, + name = .get_name_in_parent(tree.name) ){ + if( !.is_non_empty_string(tree.name) ){ stop("'", name, "' must be a single non-empty character value.", call. = FALSE) } - if( !(tree_name %in% names(x@rowTree)) ){ + if( !(tree.name %in% names(x@rowTree)) ){ stop("'", name, "' must specify a tree from 'x@rowTree'.", call. = FALSE) } } -.check_colTree_present <- function(tree_name, x, - name = .get_name_in_parent(tree_name) ){ - if( !.is_non_empty_string(tree_name) ){ +.check_colTree_present <- function(tree.name, x, + name = .get_name_in_parent(tree.name) ){ + if( !.is_non_empty_string(tree.name) ){ stop("'", name, "' must be a single non-empty character value.", call. = FALSE) } - if( !(tree_name %in% names(x@colTree)) ){ + if( !(tree.name %in% names(x@colTree)) ){ stop("'", name, "' must specify a tree from 'x@colTree'.", call. = FALSE) } @@ -397,11 +397,11 @@ #' between different taxonomic levels, defaults to one compatible with both #' GreenGenes and SILVA `; |;"`. #' -#' @param column_name a single \code{character} value defining the column of taxa_tab +#' @param col.name a single \code{character} value defining the column of taxa_tab #' that includes taxonomical information. #' -#' @param remove.prefix {\code{TRUE} or \code{FALSE}: Should -#' taxonomic prefixes be removed? (default: \code{remove.prefix = FALSE})} +#' @param prefix.rm {\code{TRUE} or \code{FALSE}: Should +#' taxonomic prefixes be removed? (default: \code{prefix.rm = FALSE})} #' #' @return a `data.frame`. #' @keywords internal @@ -409,8 +409,8 @@ #' @importFrom S4Vectors DataFrame #' @noRd .parse_taxonomy <- function( - taxa_tab, sep = "; |;", column_name = "Taxon", - remove.prefix = removeTaxaPrefixes, removeTaxaPrefixes = FALSE, + taxa_tab, sep = "; |;", col.name = column_name, column_name = "Taxon", + remove.prefix = prefix.rm, prefix.rm = removeTaxaPrefixes, removeTaxaPrefixes = FALSE, returned.ranks = TAXONOMY_RANKS, ...) { ############################### Input check ################################ # Check sep @@ -418,9 +418,9 @@ stop("'sep' must be a single character value.", call. = FALSE) } - # Check column_name - if( !(.is_non_empty_string(column_name) && column_name %in% colnames(taxa_tab)) ){ - stop("'column_name' must be a single character value defining column that includes", + # Check col.name + if( !(.is_non_empty_string(col.name) && col.name %in% colnames(taxa_tab)) ){ + stop("'col.name' must be a single character value defining column that includes", " information about taxonomic levels.", call. = FALSE) } @@ -440,7 +440,7 @@ all_prefixes <- c("k__", "p__", "c__", "o__", "f__", "g__", "s__", "t__") # split the taxa strings - taxa_split <- CharacterList(strsplit(taxa_tab[, column_name],sep)) + taxa_split <- CharacterList(strsplit(taxa_tab[, col.name],sep)) # extract present prefixes taxa_prefixes <- lapply(taxa_split, substr, 1L, 3L) # match them to the order given by present_prefixes diff --git a/inst/scripts/import.R b/inst/scripts/import.R index cc46c3bdb..fd6a5767f 100644 --- a/inst/scripts/import.R +++ b/inst/scripts/import.R @@ -7,7 +7,7 @@ sample_meta_file_path <- "PATH_TO_SAMPLE_METADATA_FILE" tree_file_path <- "PATH_TO_PHYLOGENETIC_TREE_FILE" # Load data from the biom file into a TreeSummarizedExperiment container -tse <- importBIOM(biom_file_path, removeTaxaPrefixes = TRUE, rankFromPrefix = TRUE) +tse <- importBIOM(biom_file_path, prefix.rm = TRUE, rank.from.prefix = TRUE) # Read sample metadata from file and add column names if necessary sample_meta <- diff --git a/man/agglomerate-methods.Rd b/man/agglomerate-methods.Rd index 5723ac926..553f4a68e 100644 --- a/man/agglomerate-methods.Rd +++ b/man/agglomerate-methods.Rd @@ -36,20 +36,40 @@ agglomerateByVariable(x, ...) \S4method{agglomerateByVariable}{SummarizedExperiment}(x, MARGIN, f, ...) -\S4method{agglomerateByVariable}{TreeSummarizedExperiment}(x, MARGIN, f, mergeTree = FALSE, ...) +\S4method{agglomerateByVariable}{TreeSummarizedExperiment}( + x, + MARGIN, + f, + update.tree = mergeTree, + mergeTree = FALSE, + ... +) -\S4method{agglomerateByRank}{SingleCellExperiment}(x, ..., altexp = NULL, strip_altexp = TRUE) +\S4method{agglomerateByRank}{SingleCellExperiment}( + x, + ..., + altexp = NULL, + altexp.rm = strip_altexp, + strip_altexp = TRUE +) \S4method{agglomerateByRank}{TreeSummarizedExperiment}( x, ..., + update.tree = agglomerateTree, agglomerate.tree = agglomerateTree, agglomerateTree = FALSE ) agglomerateByPrevalence(x, ...) -\S4method{agglomerateByPrevalence}{SummarizedExperiment}(x, rank = NULL, other_label = "Other", ...) +\S4method{agglomerateByPrevalence}{SummarizedExperiment}( + x, + rank = NULL, + other.label = other_label, + other_label = "Other", + ... +) agglomerateByRanks(x, ...) @@ -81,9 +101,21 @@ splitByRanks(x, ...) unsplitByRanks(x, ...) -\S4method{unsplitByRanks}{SingleCellExperiment}(x, ranks = taxonomyRanks(x), keep_reducedDims = FALSE, ...) +\S4method{unsplitByRanks}{SingleCellExperiment}( + x, + ranks = taxonomyRanks(x), + keep.dimred = keep_reducedDims, + keep_reducedDims = FALSE, + ... +) -\S4method{unsplitByRanks}{TreeSummarizedExperiment}(x, ranks = taxonomyRanks(x), keep_reducedDims = FALSE, ...) +\S4method{unsplitByRanks}{TreeSummarizedExperiment}( + x, + ranks = taxonomyRanks(x), + keep.dimred = keep_reducedDims, + keep_reducedDims = FALSE, + ... +) } \arguments{ \item{x}{a @@ -115,25 +147,29 @@ Must be \code{'rows'} or \code{'cols'}.} merged. If \code{length(levels(f)) == nrow(x)/ncol(x)}, \code{x} will be returned unchanged.} -\item{mergeTree}{\code{TRUE} or \code{FALSE}: Should -\code{rowTree()} also be merged? (Default: \code{mergeTree = FALSE})} +\item{update.tree}{\code{TRUE} or \code{FALSE}: Should +\code{rowTree()} also be merged? (Default: \code{update.tree = FALSE})} + +\item{mergeTree}{Deprecated. Use \code{update.tree} instead.} \item{altexp}{String or integer scalar specifying an alternative experiment containing the input data.} -\item{strip_altexp}{\code{TRUE} or \code{FALSE}: Should alternative +\item{altexp.rm}{\code{TRUE} or \code{FALSE}: Should alternative experiments be removed prior to agglomeration? This prevents to many nested alternative experiments by default (default: -\code{strip_altexp = TRUE})} +\code{altexp.rm = TRUE})} + +\item{strip_altexp}{Deprecated. Use \code{altexp.rm} instead.} -\item{agglomerate.tree}{\code{TRUE} or \code{FALSE}: should -\code{rowTree()} also be agglomerated? (Default: -\code{agglomerate.tree = FALSE})} +\item{agglomerate.tree}{Deprecated. Use \code{update.tree} instead.} -\item{agglomerateTree}{alias for \code{agglomerate.tree}.} +\item{agglomerateTree}{Deprecated. Use \code{update.tree} instead.} -\item{other_label}{A single \code{character} valued used as the label for the -summary of non-prevalent taxa. (default: \code{other_label = "Other"})} +\item{other.label}{A single \code{character} valued used as the label for the +summary of non-prevalent taxa. (default: \code{other.label = "Other"})} + +\item{other_label}{Deprecated. use \code{other.label} instead.} \item{ranks}{a character vector defining taxonomic ranks. Must all be values of \code{taxonomyRanks()} function.} @@ -143,10 +179,12 @@ of \code{taxonomyRanks()} function.} \code{agglomerateByRanks} as a SimpleList or stored in altExps? (default: \code{as.list = FALSE})} -\item{keep_reducedDims}{\code{TRUE} or \code{FALSE}: Should the +\item{keep.dimred}{\code{TRUE} or \code{FALSE}: Should the \code{reducedDims(x)} be transferred to the result? Please note, that this breaks the link between the data used to calculate the reduced dims. -(default: \code{keep_reducedDims = FALSE})} +(default: \code{keep.dimred = FALSE})} + +\item{keep_reducedDims}{Deprecated. Use \code{keep.dimred} instead.} } \value{ \code{agglomerateByRank} returns a taxonomically-agglomerated, @@ -215,7 +253,7 @@ level specified by \code{rank} (by default the highest taxonomic level available) and selects the summed results that exceed the given population prevalence at the given detection level. The other summed values (below the threshold) are agglomerated in an additional row taking the name indicated by -\code{other_label} (by default "Other"). +\code{other.label} (by default "Other"). \code{agglomerateByRanks} will use by default all available taxonomic ranks, but this can be controlled by setting \code{ranks} manually. \code{NA} values @@ -250,7 +288,7 @@ nrow(x1) # agglomerate the tree as well x2 <- agglomerateByRank(GlobalPatterns, rank="Family", - agglomerate.tree = TRUE) + update.tree = TRUE) nrow(x2) # same number of rows, but rowTree(x1) # ... different rowTree(x2) # ... tree @@ -272,12 +310,12 @@ nrow(x3) # different from x2 print(rownames(x3[1:3,])) # To add them, use getTaxonomyLabels function. -rownames(x3) <- getTaxonomyLabels(x3, with_rank = TRUE) +rownames(x3) <- getTaxonomyLabels(x3, with.rank = TRUE) print(rownames(x3[1:3,])) -# use 'remove_empty_ranks' to remove columns that include only NAs +# use 'empty.ranks.rm' to remove columns that include only NAs x4 <- agglomerateByRank(GlobalPatterns, rank="Phylum", - remove_empty_ranks = TRUE) + empty.ranks.rm = TRUE) head(rowData(x4)) # If the assay contains NAs, you might want to consider replacing them, @@ -307,7 +345,7 @@ plot(rowTree(esophagus)) f <- factor(regmatches(rownames(esophagus), regexpr("^[0-9]*_[0-9]*",rownames(esophagus)))) merged <- agglomerateByVariable(esophagus, MARGIN = "rows", f, - mergeTree = TRUE) + update.tree = TRUE) plot(rowTree(merged)) # data(GlobalPatterns) @@ -321,7 +359,7 @@ tse <- agglomerateByPrevalence(tse, rank = "Phylum", detection = 1/100, prevalence = 50/100, - as_relative = TRUE) + as.relative = TRUE) tse diff --git a/man/calculateUnifrac.Rd b/man/calculateUnifrac.Rd index 0ff762973..6658d7d24 100644 --- a/man/calculateUnifrac.Rd +++ b/man/calculateUnifrac.Rd @@ -16,12 +16,20 @@ calculateUnifrac(x, tree, ...) assay.type = assay_name, assay_name = exprs_values, exprs_values = "counts", + tree.name = tree_name, tree_name = "phylo", transposed = FALSE, ... ) -runUnifrac(x, tree, weighted = FALSE, nodeLab = NULL, ...) +runUnifrac( + x, + tree, + weighted = FALSE, + node.label = nodeLab, + nodeLab = NULL, + ... +) } \arguments{ \item{x}{a numeric matrix or a @@ -58,18 +66,22 @@ will be disabled.)} assay to use for calculation. (Please use \code{assay.type} instead.)} -\item{tree_name}{a single \code{character} value for specifying which +\item{tree.name}{a single \code{character} value for specifying which tree will be used in calculation. -(By default: \code{tree_name = "phylo"})} +(By default: \code{tree.name = "phylo"})} + +\item{tree_name}{Deprecated. Use \code{tree.name} instead.} \item{transposed}{Logical scalar, is x transposed with cells in rows, i.e., is Unifrac distance calculated based on rows (FALSE) or columns (TRUE). (By default: \code{transposed = FALSE})} -\item{nodeLab}{if \code{x} is a matrix, +\item{node.label}{if \code{x} is a matrix, a \code{character} vector specifying links between rows/columns and tips of \code{tree}. The length must equal the number of rows/columns of \code{x}. Furthermore, all the node labs must be present in \code{tree}.} + +\item{nodeLab}{Deprecated. Use \code{node.label} instead.} } \value{ a sample-by-sample distance matrix, suitable for NMDS, etc. diff --git a/man/estimateDiversity.Rd b/man/estimateDiversity.Rd index 4180e0548..50aca40e0 100644 --- a/man/estimateDiversity.Rd +++ b/man/estimateDiversity.Rd @@ -37,6 +37,7 @@ estimateDiversity( index = c("coverage", "faith", "fisher", "gini_simpson", "inverse_simpson", "log_modulo_skewness", "shannon"), name = index, + tree.name = tree_name, tree_name = "phylo", ..., BPPARAM = SerialParam() @@ -57,6 +58,7 @@ estimateFaith( assay.type = "counts", assay_name = NULL, name = "faith", + node.label = node_lab, node_lab = NULL, ... ) @@ -66,6 +68,7 @@ estimateFaith( assay.type = "counts", assay_name = NULL, name = "faith", + tree.name = tree_name, tree_name = "phylo", ... ) @@ -98,9 +101,10 @@ determining the threshold for coverage index. By default, this quantile of the data. The assumption is that abundances higher than this are not common, and they are classified in their own group. By default, \code{quantile} is 0.5. -\item num_of_classes: The number of arithmetic abundance classes +\item nclasses: The number of arithmetic abundance classes from zero to the quantile cutoff indicated by \code{quantile}. -By default, \code{num_of_classes} is 50. +By default, \code{nclasses} is 50. +\item num_of_classes Deprecated. Use \code{nclasses} instead. \item only.tips: A boolean value specifying whether to remove internal nodes when Faith's index is calculated. When \code{only.tips=TRUE}, those rows that are not tips of tree are removed. @@ -111,18 +115,22 @@ rows that are not tips of tree are removed. \code{\link[BiocParallel:BiocParallelParam-class]{BiocParallelParam}} object specifying whether calculation of estimates should be parallelized.} -\item{tree_name}{a single \code{character} value for specifying which +\item{tree.name}{a single \code{character} value for specifying which rowTree will be used to calculate faith index. -(By default: \code{tree_name = "phylo"})} +(By default: \code{tree.name = "phylo"})} + +\item{tree_name}{Deprecated. Use \code{tree.name} isntead.} \item{tree}{A phylogenetic tree that is used to calculate 'faith' index. If \code{x} is a \code{TreeSummarizedExperiment}, \code{rowTree(x)} is used by default.} -\item{node_lab}{NULL or a character vector specifying the links between rows and +\item{node.label}{NULL or a character vector specifying the links between rows and node labels of \code{tree}. If a certain row is not linked with the tree, missing instance should be noted as NA. When NULL, all the rownames should be found from -the tree. (By default: \code{node_lab = NULL})} +the tree. (By default: \code{node.label = NULL})} + +\item{node_lab}{Deprecated. Use \code{node.label} instead.} } \value{ \code{x} with additional \code{\link{colData}} named \code{*name*} @@ -218,10 +226,10 @@ colData(tse)[, index] <- NULL # 'threshold' can be used to determine threshold for 'coverage' index tse <- estimateDiversity(tse, index = "coverage", threshold = 0.75) -# 'quantile' and 'num_of_classes' can be used when +# 'quantile' and 'nclasses' can be used when # 'log_modulo_skewness' is calculated tse <- estimateDiversity(tse, index = "log_modulo_skewness", - quantile = 0.75, num_of_classes = 100) + quantile = 0.75, nclasses = 100) # It is recommended to specify also the final names used in the output. tse <- estimateDiversity(tse, diff --git a/man/getCrossAssociation.Rd b/man/getCrossAssociation.Rd index 179f32866..16cdb69f8 100644 --- a/man/getCrossAssociation.Rd +++ b/man/getCrossAssociation.Rd @@ -18,18 +18,26 @@ getCrossAssociation(x, ...) assay_name2 = "counts", altexp1 = NULL, altexp2 = NULL, + col.var1 = colData_variable1, colData_variable1 = NULL, + col.var2 = colData_variable2, colData_variable2 = NULL, MARGIN = 1, method = c("kendall", "spearman", "categorical", "pearson"), mode = "table", + p.adj.method = p_adj_method, p_adj_method = c("fdr", "BH", "bonferroni", "BY", "hochberg", "holm", "hommel", "none"), + p.adj.threshold = p_adj_threshold, p_adj_threshold = NULL, + cor.threshold = cor_threshold, cor_threshold = NULL, sort = FALSE, + filter.self.cor = filter_self_correlations, filter_self_correlations = FALSE, verbose = TRUE, + test.signif = test_significance, test_significance = FALSE, + show.warnings = show_warnings, show_warnings = TRUE, paired = FALSE, ... @@ -50,7 +58,7 @@ measure is symmetric or not. When \code{symmetric = TRUE}, associations are calculated only for unique variable-pairs, and they are assigned to corresponding variable-pair. This decreases the number of calculations in 2-fold meaning faster execution. (By default: \code{symmetric = FALSE}) -\item \code{association_FUN}: A function that is used to calculate (dis-)similarity +\item \code{association.fun}: A function that is used to calculate (dis-)similarity between features. Function must take matrix as an input and give numeric values as an output. Adjust \code{method} and other parameters correspondingly. Supported functions are, for example, \code{stats::dist} and \code{vegan::vegdist}. @@ -71,19 +79,13 @@ from\code{experiments(x)} of \code{MultiAssayExperiment} object or \code{\link[SummarizedExperiment:SummarizedExperiment-class]{assay}} of experiment 1 to be transformed. (By default: \code{assay.type1 = "counts"})} -\item{assay_name1}{a single \code{character} value for specifying which -assay of experiment 1 to use for calculation. -(Please use \code{assay.type1} instead. At some point \code{assay_name1} -will be disabled.)} +\item{assay_name1}{Deprecated. Use \code{assay.type1} instead.} \item{assay.type2}{A single character value for selecting the \code{\link[SummarizedExperiment:SummarizedExperiment-class]{assay}} of experiment 2 to be transformed. (By default: \code{assay.type2 = "counts"})} -\item{assay_name2}{a single \code{character} value for specifying which -assay of experiment 2 to use for calculation. -(Please use \code{assay.type2} instead. At some point \code{assay_name2} -will be disabled.)} +\item{assay_name2}{Deprecated. Use \code{assay.type2} instead.} \item{altexp1}{A single numeric or character value specifying alternative experiment from the altExp of experiment 1. If NULL, then the experiment is itself @@ -95,13 +97,17 @@ from the altExp of experiment 2. If NULL, then the experiment is itself and altExp option is disabled. (By default: \code{altexp2 = NULL})} -\item{colData_variable1}{A character value specifying column(s) from colData -of experiment 1. If colData_variable1 is used, assay.type1 is disabled. -(By default: \code{colData_variable1 = NULL})} +\item{col.var1}{A character value specifying column(s) from colData +of experiment 1. If col.var1 is used, assay.type1 is disabled. +(By default: \code{col.var1 = NULL})} -\item{colData_variable2}{A character value specifying column(s) from colData -of experiment 2. If colData_variable2 is used, assay.type2 is disabled. -(By default: \code{colData_variable2 = NULL})} +\item{colData_variable1}{Deprecated. Use \code{col.var1} instead.} + +\item{col.var2}{A character value specifying column(s) from colData +of experiment 2. If col.var2 is used, assay.type2 is disabled. +(By default: \code{col.var2 = NULL})} + +\item{colData_variable2}{Deprecated. Use \code{col.var2} instead.} \item{MARGIN}{A single numeric value for selecting if association are calculated row-wise / for features (1) or column-wise / for samples (2). Must be \code{1} or @@ -114,37 +120,49 @@ row-wise / for features (1) or column-wise / for samples (2). Must be \code{1} o \item{mode}{A single character value for selecting output format Available formats are 'table' and 'matrix'. (By default: \code{mode = "table"})} -\item{p_adj_method}{A single character value for selecting adjustment method of +\item{p.adj.method}{A single character value for selecting adjustment method of p-values. Passed to \code{p.adjust} function. -(By default: \code{p_adj_method = "fdr"})} +(By default: \code{p.adj.method = "fdr"})} -\item{p_adj_threshold}{A single numeric value (from 0 to 1) for selecting +\item{p_adj_method}{Deprecated. Use \code{p.adj.method} isntead.} + +\item{p.adj.threshold}{A single numeric value (from 0 to 1) for selecting adjusted p-value threshold for filtering. -(By default: \code{p_adj_threshold = NULL})} +(By default: \code{p.adj.threshold = NULL})} + +\item{p_adj_threshold}{Deprecated. Use \code{p.dj.threshold} instead.} -\item{cor_threshold}{A single numeric absolute value (from 0 to 1) for selecting +\item{cor.threshold}{A single numeric absolute value (from 0 to 1) for selecting correlation threshold for filtering. -(By default: \code{cor_threshold = NULL})} +(By default: \code{cor.threshold = NULL})} + +\item{cor_threshold}{Deprecated. Use \code{cor.threshold} instead.} \item{sort}{A single boolean value for selecting whether to sort features or not in result matrices. Used method is hierarchical clustering. (By default: \code{sort = FALSE})} -\item{filter_self_correlations}{A single boolean value for selecting whether to +\item{filter.self.cor}{A single boolean value for selecting whether to filter out correlations between identical items. Applies only when correlation between experiment itself is tested, i.e., when assays are identical. -(By default: \code{filter_self_correlations = FALSE})} +(By default: \code{filter.self.cor = FALSE})} + +\item{filter_self_correlations}{Deprecated. Use \code{filter.self.cor} instead.} \item{verbose}{A single boolean value for selecting whether to get messages about progress of calculation.} -\item{test_significance}{A single boolean value for selecting whether to test +\item{test.signif}{A single boolean value for selecting whether to test statistical significance of associations. -(By default: \code{test_significance = FALSE})} +(By default: \code{test.signif = FALSE})} -\item{show_warnings}{A single boolean value for selecting whether to show warnings +\item{test_significance}{Deprecated. Use \code{test.signif} instead.} + +\item{show.warnings}{A single boolean value for selecting whether to show warnings that might occur when correlations and p-values are calculated.} +\item{show_warnings}{Deprecated. use \code{show.warnings} instead.} + \item{paired}{A single boolean value for specifying if samples are paired or not. \code{colnames} must match between twp experiments. \code{paired} is disabled when \code{MARGIN = 1}. (By default: \code{paired = FALSE})} @@ -163,7 +181,7 @@ Calculate correlations between features of two experiments. The function \code{getCrossAssociation} calculates associations between features of two experiments. By default, it not only computes associations but also tests their significance. If desired, setting -\code{test_significance} to FALSE disables significance calculation. +\code{test.signif} to FALSE disables significance calculation. We recommend the non-parametric Kendall's tau as the default method for association analysis. Kendall's tau has desirable statistical properties and robustness at lower @@ -200,15 +218,15 @@ result <- getCrossAssociation(mae, experiment2 = 2, # Show first 5 entries head(result, 5) -# If test_significance = TRUE, then getCrossAssociation additionally returns +# If test.signif = TRUE, then getCrossAssociation additionally returns # significances -# filter_self_correlations = TRUE filters self correlations -# p_adj_threshold can be used to filter those features that do not +# filter.self.cor = TRUE filters self correlations +# p.adj.threshold can be used to filter those features that do not # have any correlations whose p-value is lower than the threshold result <- getCrossAssociation(mae[[1]], experiment2 = mae[[1]], method = "pearson", - filter_self_correlations = TRUE, - p_adj_threshold = 0.05, - test_significance = TRUE) + filter.self.cor = TRUE, + p.adj.threshold = 0.05, + test.signif = TRUE) # Show first 5 entries head(result, 5) @@ -218,7 +236,7 @@ names(result) # Calculate Bray-Curtis dissimilarity between samples. If dataset includes # paired samples, you can use paired = TRUE. result <- getCrossAssociation(mae[[1]], mae[[1]], MARGIN = 2, paired = FALSE, - association_FUN = vegan::vegdist, + association.fun = vegan::vegdist, method = "bray") @@ -248,8 +266,8 @@ mae[[1]] <- estimateDiversity(mae[[1]]) # named assay.type from assay slot, it fetches a column named colData_variable # from colData. result <- getCrossAssociation(mae[[1]], assay.type1 = "counts", - colData_variable2 = c("shannon", "coverage"), - test_significance = TRUE) + col.var2 = c("shannon", "coverage"), + test.signif = TRUE) } \author{ diff --git a/man/getPrevalence.Rd b/man/getPrevalence.Rd index f60c800a5..bb5a14ad2 100644 --- a/man/getPrevalence.Rd +++ b/man/getPrevalence.Rd @@ -24,6 +24,7 @@ getPrevalence(x, ...) \S4method{getPrevalence}{ANY}( x, detection = 0, + include.lowest = include_lowest, include_lowest = FALSE, sort = FALSE, na.rm = TRUE, @@ -34,6 +35,7 @@ getPrevalence(x, ...) x, assay.type = assay_name, assay_name = "counts", + as.relative = as_relative, as_relative = FALSE, rank = NULL, ... @@ -41,15 +43,41 @@ getPrevalence(x, ...) getPrevalent(x, ...) -\S4method{getPrevalent}{ANY}(x, prevalence = 50/100, include_lowest = FALSE, ...) +\S4method{getPrevalent}{ANY}( + x, + prevalence = 50/100, + include.lowest = include_lowest, + include_lowest = FALSE, + ... +) -\S4method{getPrevalent}{SummarizedExperiment}(x, rank = NULL, prevalence = 50/100, include_lowest = FALSE, ...) +\S4method{getPrevalent}{SummarizedExperiment}( + x, + rank = NULL, + prevalence = 50/100, + include.lowest = include_lowest, + include_lowest = FALSE, + ... +) getRare(x, ...) -\S4method{getRare}{ANY}(x, prevalence = 50/100, include_lowest = FALSE, ...) +\S4method{getRare}{ANY}( + x, + prevalence = 50/100, + include.lowest = include_lowest, + include_lowest = FALSE, + ... +) -\S4method{getRare}{SummarizedExperiment}(x, rank = NULL, prevalence = 50/100, include_lowest = FALSE, ...) +\S4method{getRare}{SummarizedExperiment}( + x, + rank = NULL, + prevalence = 50/100, + include.lowest = include_lowest, + include_lowest = FALSE, + ... +) subsetByPrevalent(x, ...) @@ -98,11 +126,13 @@ and \code{subsetByRare} additional parameters passed to \item{detection}{Detection threshold for absence/presence. Either an absolute value compared directly to the values of \code{x} or a relative -value between 0 and 1, if \code{as_relative = FALSE}.} +value between 0 and 1, if \code{as.relative = FALSE}.} -\item{include_lowest}{logical scalar: Should the lower boundary of the +\item{include.lowest}{logical scalar: Should the lower boundary of the detection and prevalence cutoffs be included? (default: \code{FALSE})} +\item{include_lowest}{Deprecated. Use \code{include.lowest} instead.} + \item{sort}{logical scalar: Should the result be sorted by prevalence? (default: \code{FALSE})} @@ -118,15 +148,17 @@ assay to use for calculation. (Please use \code{assay.type} instead. At some point \code{assay_name} will be disabled.)} -\item{as_relative}{logical scalar: Should the detection threshold be applied +\item{as.relative}{logical scalar: Should the detection threshold be applied on compositional (relative) abundances? (default: \code{FALSE})} +\item{as_relative}{Deprecated. Use \code{as.relative} instead.} + \item{rank}{a single character defining a taxonomic rank. Must be a value of \code{taxonomyRanks()} function.} \item{prevalence}{Prevalence threshold (in 0 to 1). The required prevalence is strictly greater by default. To include the -limit, set \code{include_lowest} to \code{TRUE}.} +limit, set \code{include.lowest} to \code{TRUE}.} } \value{ \code{subsetPrevalent} and \code{subsetRareFeatures} return subset of @@ -158,7 +190,7 @@ These functions calculate the population prevalence for taxonomic ranks in a the detection threshold. For \code{SummarizedExperiment} objects, the prevalence is calculated for the selected taxonomic rank, otherwise for the rows. The absolute population prevalence can be obtained by multiplying the -prevalence by the number of samples (\code{ncol(x)}). If \code{as_relative = +prevalence by the number of samples (\code{ncol(x)}). If \code{as.relative = FALSE} the relative frequency (between 0 and 1) is used to check against the \code{detection} threshold. @@ -184,7 +216,7 @@ tse <- GlobalPatterns prevalence.frequency <- getPrevalence(tse, detection = 0, sort = TRUE, - as_relative = TRUE) + as.relative = TRUE) head(prevalence.frequency) # Get prevalence estimates for phylums @@ -193,7 +225,7 @@ prevalence.frequency <- getPrevalence(tse, rank = "Phylum", detection = 0, sort = TRUE, - as_relative = TRUE) + as.relative = TRUE) head(prevalence.frequency) # - to obtain population counts, multiply frequencies with the sample size, @@ -209,7 +241,7 @@ prevalent <- getPrevalent(tse, rank = "Phylum", detection = 10, prevalence = 50/100, - as_relative = FALSE) + as.relative = FALSE) head(prevalent) # Gets a subset of object that includes prevalent taxa @@ -217,7 +249,7 @@ altExp(tse, "prevalent") <- subsetByPrevalent(tse, rank = "Family", detection = 0.001, prevalence = 0.55, - as_relative = TRUE) + as.relative = TRUE) altExp(tse, "prevalent") # getRare returns the inverse @@ -225,7 +257,7 @@ rare <- getRare(tse, rank = "Phylum", detection = 1/100, prevalence = 50/100, - as_relative = TRUE) + as.relative = TRUE) head(rare) # Gets a subset of object that includes rare taxa @@ -233,7 +265,7 @@ altExp(tse, "rare") <- subsetByRare(tse, rank = "Class", detection = 0.001, prevalence = 0.001, - as_relative = TRUE) + as.relative = TRUE) altExp(tse, "rare") # Names of both experiments, prevalent and rare, can be found from slot diff --git a/man/importHUMAnN.Rd b/man/importHUMAnN.Rd index 49e03660c..603953cc3 100644 --- a/man/importHUMAnN.Rd +++ b/man/importHUMAnN.Rd @@ -7,19 +7,21 @@ \item{file}{a single \code{character} value defining the file path of the HUMAnN file. The file must be in merged HUMAnN format.} -\item{colData}{a DataFrame-like object that includes sample names in +\item{col.data}{a DataFrame-like object that includes sample names in rownames, or a single \code{character} value defining the file path of the sample metadata file. The file must be in \code{tsv} format -(default: \code{colData = NULL}).} +(default: \code{col.data = NULL}).} + +\item{colData}{Deprecated. Use \code{col.data} instead.} \item{...}{additional arguments: \itemize{ \item \code{assay.type}: A single character value for naming \code{\link[TreeSummarizedExperiment:TreeSummarizedExperiment-class]{assay}} (default: \code{assay.type = "counts"}) -\item \code{removeTaxaPrefixes}: \code{TRUE} or \code{FALSE}: Should +\item \code{prefix.rm}: \code{TRUE} or \code{FALSE}: Should taxonomic prefixes be removed? (default: -\code{removeTaxaPrefixes = FALSE}) +\code{prefix.rm = FALSE}) \item \code{remove.suffix}: \code{TRUE} or \code{FALSE}: Should suffixes of sample names be removed? HUMAnN pipeline adds suffixes to sample names. Suffixes are formed from file names. By selecting diff --git a/man/importMetaPhlAn.Rd b/man/importMetaPhlAn.Rd index c94920ca0..5b7c1f0d8 100644 --- a/man/importMetaPhlAn.Rd +++ b/man/importMetaPhlAn.Rd @@ -7,19 +7,20 @@ \item{file}{a single \code{character} value defining the file path of the Metaphlan file. The file must be in merged Metaphlan format.} -\item{colData}{a DataFrame-like object that includes sample names in +\item{col.data}{a DataFrame-like object that includes sample names in rownames, or a single \code{character} value defining the file path of the sample metadata file. The file must be in \code{tsv} format -(default: \code{colData = NULL}).} +(default: \code{col.data = NULL}).} -\item{sample_meta}{a DataFrame-like object that includes sample names in -rownames, or a single \code{character} value defining the file -path of the sample metadata file. The file must be in \code{tsv} format -(default: \code{sample_meta = NULL}).} +\item{colData}{Deprecated. use \code{col.data} instead.} -\item{phy_tree}{a single \code{character} value defining the file +\item{sample_meta}{Deprecated. Use \code{col.data} instead.} + +\item{tree.file}{a single \code{character} value defining the file path of the phylogenetic tree. -(default: \code{phy_tree = NULL}).} +(default: \code{tree.file = NULL}).} + +\item{phy_tree}{Deprecated. Use \code{tree.file} instead.} \item{...}{additional arguments: \itemize{ @@ -29,9 +30,9 @@ path of the phylogenetic tree. \item \code{assay_name}: A single \code{character} value for specifying which assay to use for calculation. (Please use \code{assay.type} instead. At some point \code{assay_name} will be disabled.) -\item \code{removeTaxaPrefixes}: \code{TRUE} or \code{FALSE}: Should +\item \code{prefix.rm}: \code{TRUE} or \code{FALSE}: Should taxonomic prefixes be removed? (default: -\code{removeTaxaPrefixes = FALSE}) +\code{prefix.rm = FALSE}) \item \code{remove.suffix}: \code{TRUE} or \code{FALSE}: Should suffixes of sample names be removed? Metaphlan pipeline adds suffixes to sample names. Suffixes are formed from file names. By selecting diff --git a/man/importMothur.Rd b/man/importMothur.Rd index 297d6357c..5e8561b02 100644 --- a/man/importMothur.Rd +++ b/man/importMothur.Rd @@ -4,21 +4,34 @@ \alias{importMothur} \title{Import Mothur results as a \code{TreeSummarizedExperiment}} \usage{ -importMothur(sharedFile, taxonomyFile = NULL, designFile = NULL) +importMothur( + assay.file = sharedFile, + sharedFile, + taxonomyFile = NULL, + row.file = taxonomyFile, + designFile = NULL, + col.file = designFile +) } \arguments{ -\item{sharedFile}{a single \code{character} value defining the file +\item{assay.file}{a single \code{character} value defining the file path of the feature table to be imported. The File has to be in \code{shared file} format as defined in Mothur documentation.} -\item{taxonomyFile}{a single \code{character} value defining the file path of +\item{sharedFile}{Deprecated. Use \code{assay.file} instead.} + +\item{taxonomyFile}{Deprecated. Use \code{row.file} instead.} + +\item{row.file}{a single \code{character} value defining the file path of the taxonomy table to be imported. The File has to be in \code{taxonomy file} or \code{constaxonomy file} format as defined in Mothur -documentation. (default: \code{taxonomyFile = NULL}).} +documentation. (default: \code{row.file = NULL}).} + +\item{designFile}{Deprecated. Use \code{col.file} instead.} -\item{designFile}{a single \code{character} value defining the file path of +\item{col.file}{a single \code{character} value defining the file path of the sample metadata to be imported. The File has to be in \code{desing - file} format as defined in Mothur documentation. (default: \code{designFile + file} format as defined in Mothur documentation. (default: \code{col.file = NULL}).} } \value{ @@ -33,8 +46,8 @@ files provided as input. \details{ Results exported from Mothur can be imported as a \code{SummarizedExperiment} using \code{importMothur}. Except for the -\code{sharedFile}, the other data types, \code{taxonomyFile}, and -\code{designFile}, are optional, but are highly encouraged to be provided. +\code{assay.file}, the other data types, \code{row.file}, and +\code{col.file}, are optional, but are highly encouraged to be provided. } \examples{ # Abundance table @@ -46,7 +59,7 @@ taxa <- system.file("extdata", "mothur_example.cons.taxonomy", package = "mia") meta <- system.file("extdata", "mothur_example.design", package = "mia") # Creates se object from files -se <- importMothur(counts, taxa, meta) +se <- importMothur(assay.file = counts, row.file = taxa, col.file = meta) # Convert SE to TreeSE tse <- as(se, "TreeSummarizedExperiment") tse diff --git a/man/importQIIME2.Rd b/man/importQIIME2.Rd index 7ece59bcd..eb63b21e8 100644 --- a/man/importQIIME2.Rd +++ b/man/importQIIME2.Rd @@ -6,49 +6,67 @@ \title{Import QIIME2 results to \code{TreeSummarizedExperiment}} \usage{ importQIIME2( + assay.file = featureTableFile, featureTableFile, + row.file = taxonomyTableFile, taxonomyTableFile = NULL, + col.file = sampleMetaFile, sampleMetaFile = NULL, + as.refseq = featureNamesAsRefSeq, featureNamesAsRefSeq = TRUE, + refseq.file = refSeqFile, refSeqFile = NULL, + tree.file = phyTreeFile, phyTreeFile = NULL, ... ) -importQZA(file, temp = tempdir(), ...) +importQZA(file, temp.dir = temp, temp = tempdir(), ...) } \arguments{ -\item{featureTableFile}{a single \code{character} value defining the file +\item{assay.file}{a single \code{character} value defining the file path of the feature table to be imported.} -\item{taxonomyTableFile}{a single \code{character} value defining the file +\item{featureTableFile}{Deprecated. use \code{assay.file} instead.} + +\item{row.file}{a single \code{character} value defining the file path of the taxonomy table to be imported. (default: -\code{taxonomyTableFile = NULL}).} +\code{row.file = NULL}).} + +\item{taxonomyTableFile}{Deprecated. use \code{row.file} instead.} -\item{sampleMetaFile}{a single \code{character} value defining the file path +\item{col.file}{a single \code{character} value defining the file path of the sample metadata to be imported. The file has to be in tsv format. -(default: \code{sampleMetaFile = NULL}).} +(default: \code{col.file = NULL}).} -\item{featureNamesAsRefSeq}{\code{TRUE} or \code{FALSE}: Should the feature +\item{sampleMetaFile}{Deprecated. Use \code{col.file} instead.} + +\item{as.refseq}{\code{TRUE} or \code{FALSE}: Should the feature names of the feature table be regarded as reference sequences? This setting -will be disregarded, if \code{refSeqFile} is not \code{NULL}. If the +will be disregarded, if \code{refseq.file} is not \code{NULL}. If the feature names do not contain valid DNA characters only, the reference sequences will not be set.} -\item{refSeqFile}{a single \code{character} value defining the file path of -the reference sequences for each feature. (default: \code{refSeqFile = +\item{featureNamesAsRefSeq}{Deprecated. Use \code{as.refseq} instead.} + +\item{refseq.file}{a single \code{character} value defining the file path of +the reference sequences for each feature. (default: \code{refseq.file = NULL}).} -\item{phyTreeFile}{a single \code{character} value defining the file path of -the phylogenetic tree. (default: \code{phyTreeFile = NULL}).} +\item{refSeqFile}{Deprecated. Use \code{refseq.file} instead.} + +\item{tree.file}{a single \code{character} value defining the file path of +the phylogenetic tree. (default: \code{tree.file = NULL}).} + +\item{phyTreeFile}{Deprecated. Use \code{tree.file} isntead.} \item{...}{additional arguments: \itemize{ -\item \code{temp}: the temporary directory used for decompressing the +\item \code{temp.dir}: the temporary directory used for decompressing the data. (default: \code{tempdir()}) -\item \code{removeTaxaPrefixes}: \code{TRUE} or \code{FALSE}: Should +\item \code{prefix.rm}: \code{TRUE} or \code{FALSE}: Should taxonomic prefixes be removed? (default: -\code{removeTaxaPrefixes = FALSE}) +\code{prefix.rm = FALSE}) }} \item{file}{character, path of the input qza file. Only files in format of @@ -57,8 +75,10 @@ table), \code{NewickDirectoryFormat} (phylogenetic tree ) and \code{DNASequencesDirectoryFormat} (representative sequences) are supported right now.} -\item{temp}{character, a temporary directory in which the qza file will be +\item{temp.dir}{character, a temporary directory in which the qza file will be decompressed to, default \code{tempdir()}.} + +\item{temp}{Deprecated. Use \code{temp.dir} isntead.} } \value{ A @@ -72,42 +92,42 @@ object \description{ Results exported from QIMME2 can be imported as a \code{TreeSummarizedExperiment} using \code{importQIIME2}. Except for the -\code{featureTableFile}, the other data types, \code{taxonomyTableFile}, -\code{refSeqFile} and \code{phyTreeFile}, are optional, but are highly +\code{assay.file}, the other data types, \code{row.file}, +\code{refseq.file} and \code{tree.file}, are optional, but are highly encouraged to be provided. Import the QIIME2 artifacts to R. } \details{ -Both arguments \code{featureNamesAsRefSeq} and \code{refSeqFile} can be used -to define reference sequences of features. \code{featureNamesAsRefSeq} is -only taken into account, if \code{refSeqFile} is \code{NULL}. No reference +Both arguments \code{as.refseq} and \code{refseq.file} can be used +to define reference sequences of features. \code{as.refseq} is +only taken into account, if \code{refseq.file} is \code{NULL}. No reference sequences are tried to be created, if \code{featureNameAsRefSeq} is -\code{FALSE} and \code{refSeqFile} is \code{NULL}. +\code{FALSE} and \code{refseq.file} is \code{NULL}. } \examples{ -featureTableFile <- system.file("extdata", "table.qza", package = "mia") -taxonomyTableFile <- system.file("extdata", "taxonomy.qza", package = "mia") -sampleMetaFile <- system.file("extdata", "sample-metadata.tsv", package = "mia") -phyTreeFile <- system.file("extdata", "tree.qza", package = "mia") -refSeqFile <- system.file("extdata", "refseq.qza", package = "mia") +assay.file <- system.file("extdata", "table.qza", package = "mia") +row.file <- system.file("extdata", "taxonomy.qza", package = "mia") +col.file <- system.file("extdata", "sample-metadata.tsv", package = "mia") +tree.file <- system.file("extdata", "tree.qza", package = "mia") +refseq.file <- system.file("extdata", "refseq.qza", package = "mia") tse <- importQIIME2( - featureTableFile = featureTableFile, - taxonomyTableFile = taxonomyTableFile, - sampleMetaFile = sampleMetaFile, - refSeqFile = refSeqFile, - phyTreeFile = phyTreeFile + assay.file = assay.file, + row.file = row.file, + col.file = col.file, + refseq.file = refseq.file, + tree.file = tree.file ) tse # Read individual files -featureTableFile <- system.file("extdata", "table.qza", package = "mia") -taxonomyTableFile <- system.file("extdata", "taxonomy.qza", package = "mia") -sampleMetaFile <- system.file("extdata", "sample-metadata.tsv", package = "mia") +assay.file <- system.file("extdata", "table.qza", package = "mia") +row.file <- system.file("extdata", "taxonomy.qza", package = "mia") +col.file <- system.file("extdata", "sample-metadata.tsv", package = "mia") -assay <- importQZA(featureTableFile) -rowdata <- importQZA(taxonomyTableFile, removeTaxaPrefixes = TRUE) -coldata <- read.table(sampleMetaFile, header = TRUE, sep = "\t", comment.char = "") +assay <- importQZA(assay.file) +rowdata <- importQZA(row.file, prefix.rm = TRUE) +coldata <- read.table(col.file, header = TRUE, sep = "\t", comment.char = "") # Assign rownames rownames(coldata) <- coldata[, 1] diff --git a/man/makePhyloseqFromTreeSE.Rd b/man/makePhyloseqFromTreeSE.Rd index bcac72f0a..37f472be3 100644 --- a/man/makePhyloseqFromTreeSE.Rd +++ b/man/makePhyloseqFromTreeSE.Rd @@ -12,7 +12,7 @@ makePhyloseqFromTreeSE(x, ...) \S4method{makePhyloseqFromTreeSE}{SummarizedExperiment}(x, assay.type = "counts", assay_name = NULL, ...) -\S4method{makePhyloseqFromTreeSE}{TreeSummarizedExperiment}(x, tree_name = "phylo", ...) +\S4method{makePhyloseqFromTreeSE}{TreeSummarizedExperiment}(x, tree.name = tree_name, tree_name = "phylo", ...) makePhyloseqFromTreeSummarizedExperiment(x, ...) @@ -33,9 +33,11 @@ assay to use for calculation. (Please use \code{assay.type} instead. At some point \code{assay_name} will be disabled.)} -\item{tree_name}{a single \code{character} value for specifying which +\item{tree.name}{a single \code{character} value for specifying which tree will be included in the phyloseq object that is created, -(By default: \code{tree_name = "phylo"})} +(By default: \code{tree.name = "phylo"})} + +\item{tree_name}{Deprecated. Use \code{tree.name} instead.} } \value{ An object of class \code{Phyloseq} object. diff --git a/man/makeTreeSEFromBiom.Rd b/man/makeTreeSEFromBiom.Rd index 8b7ab99f3..6fb71bc8f 100644 --- a/man/makeTreeSEFromBiom.Rd +++ b/man/makeTreeSEFromBiom.Rd @@ -9,14 +9,17 @@ importBIOM(file, ...) makeTreeSEFromBiom( - obj, + x, + prefix.rm = removeTaxaPrefixes, removeTaxaPrefixes = FALSE, + rank.from.prefix = rankFromPrefix, rankFromPrefix = FALSE, + artifact.rm = remove.artifacts, remove.artifacts = FALSE, ... ) -makeTreeSummarizedExperimentFromBiom(obj, ...) +makeTreeSummarizedExperimentFromBiom(x, ...) } \arguments{ \item{file}{biom file location} @@ -28,20 +31,26 @@ to be removed. If \code{patterns = "auto"}, special characters are removed. (default: \code{pattern = "auto"}) }} -\item{obj}{object of type \code{\link[biomformat:read_biom]{biom}}} +\item{x}{object of type \code{\link[biomformat:read_biom]{biom}}} -\item{removeTaxaPrefixes}{\code{TRUE} or \code{FALSE}: Should +\item{prefix.rm}{\code{TRUE} or \code{FALSE}: Should taxonomic prefixes be removed? The prefixes is removed only from detected -taxa columns meaning that \code{rankFromPrefix} should be enabled in the most cases. -(default \code{removeTaxaPrefixes = FALSE})} +taxa columns meaning that \code{rank.from.prefix} should be enabled in the most cases. +(default \code{prefix.rm = FALSE})} -\item{rankFromPrefix}{\code{TRUE} or \code{FALSE}: If file does not have +\item{removeTaxaPrefixes}{Deprecated. Use \code{prefix.rm} instead.} + +\item{rank.from.prefix}{\code{TRUE} or \code{FALSE}: If file does not have taxonomic ranks on feature table, should they be scraped from prefixes? -(default \code{rankFromPrefix = FALSE})} +(default \code{rank.from.prefix = FALSE})} + +\item{rankFromPrefix}{Deprecated.Use \code{rank.from.prefix} instead.} -\item{remove.artifacts}{\code{TRUE} or \code{FALSE}: If file have +\item{artifact.rm}{\code{TRUE} or \code{FALSE}: If file have some taxonomic character naming artifacts, should they be removed. -(default \code{remove.artifacts = FALSE})} +(default \code{artifact.rm = FALSE})} + +\item{remove.artifacts}{Deprecated. Use \code{artifact.rm} instead.} } \value{ An object of class @@ -67,8 +76,8 @@ tse <- makeTreeSEFromBiom(biom_object) # Get taxonomyRanks from prefixes and remove prefixes tse <- importBIOM(biom_file, - rankFromPrefix = TRUE, - removeTaxaPrefixes = TRUE) + rank.from.prefix = TRUE, + prefix.rm = TRUE) # Load another biom file biom_file <- system.file("extdata/testdata", "Aggregated_humanization2.biom", @@ -76,7 +85,7 @@ biom_file <- system.file("extdata/testdata", "Aggregated_humanization2.biom", # Clean artifacts from taxonomic data tse <- importBIOM(biom_file, - remove.artifacts = TRUE) + artifact.rm = TRUE) } \seealso{ \code{\link[=makeTreeSEFromPhyloseq]{makeTreeSEFromPhyloseq}} diff --git a/man/makeTreeSEFromPhyloseq.Rd b/man/makeTreeSEFromPhyloseq.Rd index 7c46db4fd..f629642e7 100644 --- a/man/makeTreeSEFromPhyloseq.Rd +++ b/man/makeTreeSEFromPhyloseq.Rd @@ -6,14 +6,14 @@ \alias{makeTreeSummarizedExperimentFromPhyloseq,ANY-method} \title{Coerce a \code{phyloseq} object to a \code{TreeSummarizedExperiment}} \usage{ -makeTreeSEFromPhyloseq(obj) +makeTreeSEFromPhyloseq(x) -makeTreeSummarizedExperimentFromPhyloseq(obj) +makeTreeSummarizedExperimentFromPhyloseq(x) -\S4method{makeTreeSummarizedExperimentFromPhyloseq}{ANY}(obj) +\S4method{makeTreeSummarizedExperimentFromPhyloseq}{ANY}(x) } \arguments{ -\item{obj}{a \code{phyloseq} object} +\item{x}{a \code{phyloseq} object} } \value{ An object of class \code{TreeSummarizedExperiment} diff --git a/man/meltSE.Rd b/man/meltSE.Rd index 3ac5095f5..d367a46ac 100644 --- a/man/meltSE.Rd +++ b/man/meltSE.Rd @@ -10,9 +10,13 @@ meltSE( x, assay.type = assay_name, assay_name = "counts", + add.row = add_row_data, add_row_data = NULL, + add.col = add_col_data, add_col_data = NULL, + row.name = feature_name, feature_name = "FeatureID", + col.name = sample_name, sample_name = "SampleID", ... ) @@ -21,9 +25,13 @@ meltSE( x, assay.type = assay_name, assay_name = "counts", + add.row = add_row_data, add_row_data = NULL, + add.col = add_col_data, add_col_data = NULL, + row.name = feature_name, feature_name = "FeatureID", + col.name = sample_name, sample_name = "SampleID", ... ) @@ -40,27 +48,35 @@ assay to use for calculation. (Please use \code{assay.type} instead. At some point \code{assay_name} will be disabled.)} -\item{add_row_data}{\code{NULL}, \code{TRUE} or a \code{character} vector to +\item{add.row}{\code{NULL}, \code{TRUE} or a \code{character} vector to select information from the \code{rowData} to add to the molten assay data. -If \code{add_row_data = NULL} no data will be added, if -\code{add_row_data = TRUE} all data will be added and if -\code{add_row_data} is a \code{character} vector, it will be used to subset +If \code{add.row = NULL} no data will be added, if +\code{add.row = TRUE} all data will be added and if +\code{add.row} is a \code{character} vector, it will be used to subset to given column names in \code{rowData}. (default: -\code{add_row_data = NULL})} +\code{add.row = NULL})} -\item{add_col_data}{\code{NULL}, \code{TRUE} or a \code{character} vector to +\item{add_row_data}{Deprecated. Use \code{add.row} instead.} + +\item{add.col}{\code{NULL}, \code{TRUE} or a \code{character} vector to select information from the \code{colData} to add to the molten assay data. -If \code{add_col_data = NULL} no data will be added, if -\code{add_col_data = TRUE} all data will be added and if -\code{add_col_data} is a \code{character} vector, it will be used to subset +If \code{add.col = NULL} no data will be added, if +\code{add.col = TRUE} all data will be added and if +\code{add.col} is a \code{character} vector, it will be used to subset to given column names in \code{colData}. (default: -\code{add_col_data = NULL})} +\code{add.col = NULL})} + +\item{add_col_data}{Deprecated. Use \code{add.col} instead.} + +\item{row.name}{a \code{character} scalar to use as the output's name +for the feature identifier. (default: \code{row.name = "FeatureID"})} + +\item{feature_name}{Deprecated. Use \code{row.name} instead.} -\item{feature_name}{a \code{character} scalar to use as the output's name -for the feature identifier. (default: \code{feature_name = "FeatureID"})} +\item{col.name}{a \code{character} scalar to use as the output's name +for the sample identifier. (default: \code{col.name = "SampleID"})} -\item{sample_name}{a \code{character} scalar to use as the output's name -for the sample identifier. (default: \code{sample_name = "SampleID"})} +\item{sample_name}{Deprecated. Use \code{col.name} instead.} \item{...}{optional arguments: \itemize{ @@ -91,8 +107,8 @@ names are set. data(GlobalPatterns) molten_tse <- meltSE(GlobalPatterns, assay.type = "counts", - add_row_data = TRUE, - add_col_data = TRUE + add.row = TRUE, + add.col = TRUE ) molten_tse } diff --git a/man/mergeSEs.Rd b/man/mergeSEs.Rd index b65eeb54e..fc675a771 100644 --- a/man/mergeSEs.Rd +++ b/man/mergeSEs.Rd @@ -14,8 +14,11 @@ mergeSEs(x, ...) assay.type = "counts", assay_name = NULL, join = "full", + missing.values = missing_values, missing_values = NA, + collapse.cols = collapse_samples, collapse_samples = FALSE, + collapse.rows = collapse_features, collapse_features = TRUE, verbose = TRUE, ... @@ -41,20 +44,26 @@ to be merged. (By default: \code{assay.type = "counts"})} Must be 'full', 'inner', 'left', or 'right'. 'left' and 'right' are disabled when more than two objects are being merged. (By default: \code{join = "full"})} -\item{missing_values}{NA, 0, or a single character values specifying the notation -of missing values. (By default: \code{missing_values = NA})} +\item{missing.values}{NA, 0, or a single character values specifying the notation +of missing values. (By default: \code{missing.values = NA})} -\item{collapse_samples}{A boolean value for selecting whether to collapse identically -named samples to one. (By default: \code{collapse_samples = FALSE})} +\item{missing_values}{Deprecated. Use \code{missing.values} instead.} -\item{collapse_features}{A boolean value for selecting whether to collapse identically +\item{collapse.cols}{A boolean value for selecting whether to collapse identically +named samples to one. (By default: \code{collapse.cols = FALSE})} + +\item{collapse_samples}{Deprecated. Use \code{collapse.cols} instead.} + +\item{collapse.rows}{A boolean value for selecting whether to collapse identically named features to one. Since all taxonomy information is taken into account, this concerns rownames-level (usually strain level) comparison. Often OTU or ASV level is just an arbitrary number series from sequencing machine meaning that the OTU information is not comparable between studies. With this option, it is possible to specify whether these strains are combined if their taxonomy information along with OTU number matches. -(By default: \code{collapse_features = TRUE})} +(By default: \code{collapse.rows = TRUE})} + +\item{collapse_features}{Deprecated. Use \code{collapse.rows} instead.} \item{verbose}{A single boolean value to choose whether to show messages. (By default: \code{verbose = TRUE})} @@ -79,12 +88,12 @@ Equally named rows are interpreted as equal. Further matching based on \code{rowData} is not done. For samples, collapsing is disabled by default meaning that equally named samples that are stored in different objects are interpreted as unique. Collapsing can be enabled -with \code{collapse_samples = TRUE} when equally named samples describe the same +with \code{collapse.cols = TRUE} when equally named samples describe the same sample. If, for example, all rows are not shared with individual objects, there are missing values in \code{assays}. The notation of missing -can be specified with the \code{missing_values} argument. If input consists of +can be specified with the \code{missing.values} argument. If input consists of \code{TreeSummarizedExperiment} objects, also \code{rowTree}, \code{colTree}, and \code{referenceSeq} are preserved if possible. The data is preserved if all the rows or columns can be found from it. @@ -125,7 +134,7 @@ tse <- mergeSEs(tse1, tse2) # Merge a list of TreeSEs list <- SimpleList(tse1, tse2, tse3) -tse <- mergeSEs(list, assay.type = "counts", missing_values = 0) +tse <- mergeSEs(list, assay.type = "counts", missing.values = 0) tse # With 'join', it is possible to specify the merging method. Subsets are used @@ -134,9 +143,9 @@ tse_temp <- mergeSEs(tse[1:10, 1:10], tse[5:100, 11:20], join = "left") tse_temp # If your objects contain samples that describe one and same sample, -# you can collapse equally named samples to one by specifying 'collapse_samples' +# you can collapse equally named samples to one by specifying 'collapse.cols' tse_temp <- mergeSEs(list(tse[1:10, 1], tse[1:20, 1], tse[1:5, 1]), - collapse_samples = TRUE, + collapse.cols = TRUE, join = "inner") tse_temp diff --git a/man/rarefyAssay.Rd b/man/rarefyAssay.Rd index 060152c3f..463eb8f48 100644 --- a/man/rarefyAssay.Rd +++ b/man/rarefyAssay.Rd @@ -9,6 +9,7 @@ rarefyAssay( x, assay.type = assay_name, assay_name = "counts", + sample = min_size, min_size = min(colSums2(assay(x))), replace = TRUE, name = "subsampled", @@ -20,6 +21,7 @@ rarefyAssay( x, assay.type = assay_name, assay_name = "counts", + sample = min_size, min_size = min(colSums2(assay(x))), replace = TRUE, name = "subsampled", @@ -41,10 +43,12 @@ assay to use for calculation. (Please use \code{assay.type} instead. At some point \code{assay_name} will be disabled.)} -\item{min_size}{A single integer value equal to the number of counts being +\item{sample}{A single integer value equal to the number of counts being simulated this can equal to lowest number of total counts found in a sample or a user specified number.} +\item{min_size}{Deprecated. Use \code{sample} instead.} + \item{replace}{Logical Default is \code{TRUE}. The default is with replacement (\code{replace=TRUE}). See \code{\link[phyloseq:rarefy_even_depth]{phyloseq::rarefy_even_depth}} @@ -77,14 +81,14 @@ To maintain the reproducibility, please define the seed using set.seed() before implement this function. } \examples{ -# When samples in TreeSE are less than specified min_size, they will be removed. +# When samples in TreeSE are less than specified sample, they will be removed. # If after subsampling features are not present in any of the samples, # they will be removed. data(GlobalPatterns) tse <- GlobalPatterns set.seed(123) tse.subsampled <- rarefyAssay(tse, - min_size = 60000, + sample = 60000, name = "subsampled" ) tse.subsampled diff --git a/man/runDPCoA.Rd b/man/runDPCoA.Rd index 281edb553..714f2db81 100644 --- a/man/runDPCoA.Rd +++ b/man/runDPCoA.Rd @@ -16,6 +16,7 @@ getDPCoA(x, y, ...) y, ncomponents = 2, ntop = NULL, + subset.row = subset_row, subset_row = NULL, scale = FALSE, transposed = FALSE, @@ -28,6 +29,7 @@ getDPCoA(x, y, ...) assay.type = assay_name, assay_name = exprs_values, exprs_values = "counts", + tree.name = tree_name, tree_name = "phylo" ) @@ -58,10 +60,12 @@ to obtain.} variances to use for dimensionality reduction. Alternatively \code{NULL}, if all features should be used. (default: \code{ntop = NULL})} -\item{subset_row}{Vector specifying the subset of features to use for +\item{subset.row}{Vector specifying the subset of features to use for dimensionality reduction. This can be a character vector of row names, an integer vector of row indices or a logical vector.} +\item{subset_row}{Deprecated. Use \code{subset.row} instead.} + \item{scale}{Logical scalar, should the expression values be standardized?} \item{transposed}{Logical scalar, is x transposed with cells in rows?} @@ -78,9 +82,11 @@ will be disabled.)} assay to use for calculation. (Please use \code{assay.type} instead.)} -\item{tree_name}{a single \code{character} value for specifying which +\item{tree.name}{a single \code{character} value for specifying which rowTree will be used in calculation. -(By default: \code{tree_name = "phylo"})} +(By default: \code{tree.name = "phylo"})} + +\item{tree_name}{Deprecated. Use \code{tree.name} instead.} \item{altexp}{String or integer scalar specifying an alternative experiment containing the input data.} diff --git a/man/runNMDS.Rd b/man/runNMDS.Rd index e9fe59ec9..769fa9033 100644 --- a/man/runNMDS.Rd +++ b/man/runNMDS.Rd @@ -15,12 +15,15 @@ getNMDS(x, ...) \S4method{getNMDS}{ANY}( x, FUN = vegdist, + nmds.fun = nmdsFUN, nmdsFUN = c("isoMDS", "monoMDS"), ncomponents = 2, ntop = 500, + subset.row = subset_row, subset_row = NULL, scale = FALSE, transposed = FALSE, + keep.dist = keep_dist, keep_dist = FALSE, ... ) @@ -41,6 +44,7 @@ getNMDS(x, ...) assay_name = exprs_values, exprs_values = "counts", dimred = NULL, + ndimred = n_dimred, n_dimred = NULL, FUN = vegdist ) @@ -59,34 +63,40 @@ Alternatively, a \code{TreeSummarizedExperiment} containing such a matrix. For \code{addNMDS} a \linkS4class{SingleCellExperiment}} \item{...}{additional arguments to pass to \code{FUN} and -\code{nmdsFUN}.} +\code{nmds.fun}.} \item{FUN}{a \code{function} or \code{character} value with a function name returning a \code{\link[stats:dist]{dist}} object} -\item{nmdsFUN}{a \code{character} value to choose the scaling +\item{nmds.fun}{a \code{character} value to choose the scaling implementation, either \dQuote{isoMDS} for \code{\link[MASS:isoMDS]{MASS::isoMDS}} or \dQuote{monoMDS} for \code{\link[vegan:monoMDS]{vegan::monoMDS}}} +\item{nmdsFUN}{Deprecated. Use \code{nmds.fun} instead.} + \item{ncomponents}{Numeric scalar indicating the number of NMDS dimensions to obtain.} \item{ntop}{Numeric scalar specifying the number of features with the highest variances to use for dimensionality reduction.} -\item{subset_row}{Vector specifying the subset of features to use for +\item{subset.row}{Vector specifying the subset of features to use for dimensionality reduction. This can be a character vector of row names, an integer vector of row indices or a logical vector.} +\item{subset_row}{Deprecated. Use \code{subset.row} instead.} + \item{scale}{Logical scalar, should the expression values be standardized?} \item{transposed}{Logical scalar, is x transposed with cells in rows?} -\item{keep_dist}{Logical scalar indicating whether the \code{dist} object +\item{keep.dist}{Logical scalar indicating whether the \code{dist} object calculated by \code{FUN} should be stored as \sQuote{dist} attribute of the matrix returned/stored by \code{getNMDS}/ \code{addNMDS}.} +\item{keep_dist}{Deprecated. Use \code{keep.dist} instead.} + \item{assay.type}{a single \code{character} value for specifying which assay to use for calculation.} @@ -102,9 +112,11 @@ assay to use for calculation. \item{dimred}{String or integer scalar specifying the existing dimensionality reduction results to use.} -\item{n_dimred}{Integer scalar or vector specifying the dimensions to use if +\item{ndimred}{Integer scalar or vector specifying the dimensions to use if dimred is specified.} +\item{n_dimred}{Deprecated. Use \code{ndimred} instead.} + \item{altexp}{String or integer scalar specifying an alternative experiment containing the input data.} @@ -123,7 +135,7 @@ data in a \code{SingleCellExperiment} object. Either \code{\link[MASS:isoMDS]{MASS::isoMDS}} or \code{\link[vegan:monoMDS]{vegan::monoMDS}} are used internally to compute the NMDS components. If you supply a custom \code{FUN}, make sure that -the arguments of \code{FUN} and \code{nmdsFUN} do not collide. +the arguments of \code{FUN} and \code{nmds.fun} do not collide. } \examples{ # generate some example data diff --git a/man/splitOn.Rd b/man/splitOn.Rd index bb24ac165..b524cfd47 100644 --- a/man/splitOn.Rd +++ b/man/splitOn.Rd @@ -17,15 +17,22 @@ splitOn(x, ...) \S4method{splitOn}{SingleCellExperiment}(x, f = NULL, ...) -\S4method{splitOn}{TreeSummarizedExperiment}(x, f = NULL, update_rowTree = FALSE, ...) +\S4method{splitOn}{TreeSummarizedExperiment}(x, f = NULL, update.tree = update_rowTree, update_rowTree = FALSE, ...) unsplitOn(x, ...) -\S4method{unsplitOn}{list}(x, update_rowTree = FALSE, ...) +\S4method{unsplitOn}{list}(x, update.tree = update_rowTree, update_rowTree = FALSE, ...) -\S4method{unsplitOn}{SimpleList}(x, update_rowTree = FALSE, ...) +\S4method{unsplitOn}{SimpleList}(x, update.tree = update_rowTree, update_rowTree = FALSE, ...) -\S4method{unsplitOn}{SingleCellExperiment}(x, altExpNames = names(altExps(x)), keep_reducedDims = FALSE, ...) +\S4method{unsplitOn}{SingleCellExperiment}( + x, + altexp = altExpNames, + altExpNames = names(altExps(x)), + keep.dimred = keep_reducedDims, + keep_reducedDims = FALSE, + ... +) } \arguments{ \item{x}{A @@ -39,7 +46,7 @@ objects.} See \code{\link[=agglomerate-methods]{agglomerateByVariable}} for more details. \itemize{ -\item{\code{use_names} A single boolean value to select whether to name elements of +\item{\code{use.names} A single boolean value to select whether to name elements of list by their group names.} }} @@ -50,18 +57,24 @@ dimensions, \code{MARGIN} must be specified. Split by cols is not encouraged, since this is not compatible with storing the results in \code{altExps}.} -\item{update_rowTree}{\code{TRUE} or \code{FALSE}: Should the rowTree be updated +\item{update.tree}{\code{TRUE} or \code{FALSE}: Should the rowTree be updated based on splitted data? Option is enabled when \code{x} is a \code{TreeSummarizedExperiment} object or a list of such objects. -(By default: \code{update_rowTree = FALSE})} +(By default: \code{update.tree = FALSE})} -\item{altExpNames}{a \code{character} vector specifying the alternative experiments -to be unsplit. (By default: \code{altExpNames = names(altExps(x))})} +\item{update_rowTree}{Deprecated. Use \code{update.tree } instead.} -\item{keep_reducedDims}{\code{TRUE} or \code{FALSE}: Should the +\item{altexp}{a \code{character} vector specifying the alternative experiments +to be unsplit. (By default: \code{altexp = names(altExps(x))})} + +\item{altExpNames}{Deprecated. Use \code{altexp} instead.} + +\item{keep.dimred}{\code{TRUE} or \code{FALSE}: Should the \code{reducedDims(x)} be transferred to the result? Please note, that this breaks the link between the data used to calculate the reduced dims. -(By default: \code{keep_reducedDims = FALSE})} +(By default: \code{keep.dimred = FALSE})} + +\item{keep_reducedDims}{Deprecated. Use \code{keep.dimred} instead.} } \value{ For \code{splitOn}: \code{SummarizedExperiment} objects in a \code{SimpleList}. @@ -97,7 +110,7 @@ colData(tse)$group <- sample(1:3, ncol(tse), replace = TRUE) # Each element is named based on their group name. If you don't want to name # elements, use use_name = FALSE. Since "group" can be found from rowdata and colData # you must use MARGIN. -se_list <- splitOn(tse, f = "group", use_names = FALSE, MARGIN = 1) +se_list <- splitOn(tse, f = "group", use.names = FALSE, MARGIN = 1) # When column names are shared between elements, you can store the list to altExps altExps(tse) <- se_list @@ -105,7 +118,7 @@ altExps(tse) <- se_list altExps(tse) # If you want to split on columns and update rowTree, you can do -se_list <- splitOn(tse, f = colData(tse)$group, update_rowTree = TRUE) +se_list <- splitOn(tse, f = colData(tse)$group, update.tree = TRUE) # If you want to combine groups back together, you can use unsplitBy unsplitOn(se_list) diff --git a/man/taxonomy-methods.Rd b/man/taxonomy-methods.Rd index 3c92e9bf7..d88aa496e 100644 --- a/man/taxonomy-methods.Rd +++ b/man/taxonomy-methods.Rd @@ -54,15 +54,25 @@ getTaxonomyLabels(x, ...) \S4method{getTaxonomyLabels}{SummarizedExperiment}( x, empty.fields = c(NA, "", " ", "\\t", "-", "_"), + with.rank = with_rank, with_rank = FALSE, + make.unique = make_unique, make_unique = TRUE, + resolve.loops = resolve_loops, resolve_loops = FALSE, ... ) mapTaxonomy(x, ...) -\S4method{mapTaxonomy}{SummarizedExperiment}(x, taxa = NULL, from = NULL, to = NULL, use_grepl = FALSE) +\S4method{mapTaxonomy}{SummarizedExperiment}( + x, + taxa = NULL, + from = NULL, + to = NULL, + use.grepl = use_grepl, + use_grepl = FALSE +) IdTaxaToDataFrame(from) } @@ -82,16 +92,22 @@ removed if \code{na.rm = TRUE} before agglomeration} \item{ranks}{Avector of ranks to be set} -\item{with_rank}{\code{TRUE} or \code{FALSE}: Should the level be add as a +\item{with.rank}{\code{TRUE} or \code{FALSE}: Should the level be add as a suffix? For example: "Phylum:Crenarchaeota" (default: -\code{with_rank = FALSE})} +\code{with.rank = FALSE})} + +\item{with_rank}{Deprecated. Use \code{with.rank} instead.} -\item{make_unique}{\code{TRUE} or \code{FALSE}: Should the labels be made -unique, if there are any duplicates? (default: \code{make_unique = TRUE})} +\item{make.unique}{\code{TRUE} or \code{FALSE}: Should the labels be made +unique, if there are any duplicates? (default: \code{make.unique = TRUE})} -\item{resolve_loops}{\code{TRUE} or \code{FALSE}: Should \code{resolveLoops} +\item{make_unique}{Deprecated. Use \code{make.unique} instead.} + +\item{resolve.loops}{\code{TRUE} or \code{FALSE}: Should \code{resolveLoops} be applied to the taxonomic data? Please note that has only an effect, -if the data is unique. (default: \code{resolve_loops = TRUE})} +if the data is unique. (default: \code{resolve.loops = TRUE})} + +\item{resolve_loops}{Deprecated. Use \code{resolve.loops} instead.} \item{taxa}{a \code{character} vector, which is used for subsetting the taxonomic information. If no information is found,\code{NULL} is returned @@ -107,9 +123,11 @@ must be a valid taxonomic rank. (default: \code{NULL}) \item{to}{a scalar \code{character} value, which must be a valid taxonomic rank. (default: \code{NULL})} -\item{use_grepl}{\code{TRUE} or \code{FALSE}: should pattern matching via +\item{use.grepl}{\code{TRUE} or \code{FALSE}: should pattern matching via \code{grepl} be used? Otherwise literal matching is used. (default: \code{FALSE})} + +\item{use_grepl}{Deprecated. Use \code{use.grepl} instead.} } \value{ \itemize{ diff --git a/man/transformAssay.Rd b/man/transformAssay.Rd index 66ab9f0ba..d91212c8d 100644 --- a/man/transformAssay.Rd +++ b/man/transformAssay.Rd @@ -62,9 +62,10 @@ Alternatively, a user-specified numeric value can be added as pseudocount.} \item{...}{additional arguments passed on to \code{vegan:decostand}: \itemize{ -\item \code{ref_vals}: A single value which will be used to fill +\item \code{reference}: A single value which will be used to fill reference sample's column in returned assay when calculating alr. -(default: \code{ref_vals = NA}) +(default: \code{reference = NA}) +\item \code{ref_vals} Deprecated. Use \code{reference} instead. }} } \value{ diff --git a/tests/testthat/test-0diversity.R b/tests/testthat/test-0diversity.R index f04732807..091b5452b 100644 --- a/tests/testthat/test-0diversity.R +++ b/tests/testthat/test-0diversity.R @@ -41,20 +41,20 @@ test_that("diversity estimates", { expect_equal(unname(round(cd$fisher, 4)), c(8.8037, 10.0989, 13.2783)) expect_equal(unname(round(cd$log_modulo_skewness, 6)), c(2.013610, 1.827198, 2.013695)) - # Tests that 'quantile' and 'num_of_classes' are working + # Tests that 'quantile' and 'nclasses' are working expect_equal(unname(round(colData(estimateDiversity(tse,index="log_modulo_skewness", quantile=0.855, - num_of_classes=32) + nclasses=32) )$log_modulo_skewness, 6)), c(1.814770, 1.756495, 1.842704)) # Tests that .calc_skewness returns right value mat <- assay(tse, "counts") - num_of_classes <- 61 + nclasses <- 61 quantile <- 0.35 quantile_point <- quantile(max(mat), quantile) - cutpoints <- c(seq(0, quantile_point, length=num_of_classes), Inf) + cutpoints <- c(seq(0, quantile_point, length=nclasses), Inf) freq_table <- table(cut(mat, cutpoints), col(mat)) test1 <- mia:::.calc_skewness(freq_table) @@ -115,24 +115,24 @@ test_that("diversity estimates", { expect_equal(colnames(colData(se_tree)), c(colnames(colData(se)), "faith")) # Expect error - expect_error(estimateDiversity(tse, index = "faith", tree_name = "test")) - expect_warning(estimateDiversity(tse, index = c("shannon", "faith"), tree_name = "test")) + expect_error(estimateDiversity(tse, index = "faith", tree.name = "test")) + expect_warning(estimateDiversity(tse, index = c("shannon", "faith"), tree.name = "test")) data(GlobalPatterns, package="mia") data(esophagus, package="mia") tse <- mergeSEs(GlobalPatterns, esophagus, join = "full", assay.type = "counts") expect_warning(estimateDiversity(tse, index = c("shannon", "faith"), - tree_name = "phylo.1", assay.type="counts")) + tree.name = "phylo.1", assay.type="counts")) expect_warning(estimateDiversity(tse, index = c("shannon", "faith"))) expect_error(estimateDiversity(tse, index = c("faith"), - tree_name = "test")) + tree.name = "test")) expect_error(estimateDiversity(tse, index = c("shannon", "faith"), - tree_name = TRUE)) + tree.name = TRUE)) expect_error(estimateDiversity(tse, index = c("shannon", "faith"), - tree_name = 1)) + tree.name = 1)) expect_error(estimateDiversity(tse, index = c("shannon", "faith"), - tree_name = c("phylo", "phylo.1"))) + tree.name = c("phylo", "phylo.1"))) # Test Faith with picante packages results (version 1.8.2) picante_res <- c( diff --git a/tests/testthat/test-0utilites.R b/tests/testthat/test-0utilites.R index 006b1cf9f..d33506f64 100644 --- a/tests/testthat/test-0utilites.R +++ b/tests/testthat/test-0utilites.R @@ -2,11 +2,11 @@ context("meltSE") test_that("meltSE", { # .norm_add_row_data .norm_add_col_data expect_error(mia:::.norm_add_row_data(), - 'argument "add_row_data" is missing') + 'argument "add.row" is missing') expect_error(mia:::.norm_add_row_data(TRUE), 'argument "x" is missing') expect_error(mia:::.norm_add_col_data(), - 'argument "add_col_data" is missing') + 'argument "add.col" is missing') expect_error(mia:::.norm_add_col_data(TRUE), 'argument "x" is missing') data(GlobalPatterns, package="mia") @@ -27,15 +27,15 @@ test_that("meltSE", { expect_warning(mia:::.norm_add_col_data(TRUE, x2, "SampleID"), "'x' contains a column") expect_error(mia:::.norm_add_row_data(NA, x, "FeatureID"), - "'add_row_data' contains NA") + "'add.row' contains NA") expect_error(mia:::.norm_add_col_data(NA, x, "SampleID"), - "'add_col_data' contains NA") + "'add.col' contains NA") # se <- GlobalPatterns molten_assay <- meltSE(se, - add_row_data = TRUE, - add_col_data = c("X.SampleID", "Primer"), + add.row = TRUE, + add.col = c("X.SampleID", "Primer"), assay.type = "counts") expect_s3_class(molten_assay, c("tbl_df","tbl","data.frame")) expect_equal(colnames(molten_assay)[c(1:4,11)], c("FeatureID","SampleID","counts","Kingdom","X.SampleID")) @@ -47,7 +47,7 @@ test_that("meltSE", { assay_taxa <- mia:::.add_row_data_to_molten_assay(only_assay, se, - add_row_data = taxonomyRanks(se), + add.row = taxonomyRanks(se), "FeatureID") expect_equal(colnames(assay_taxa)[1:4], c("FeatureID","SampleID","counts","Kingdom")) @@ -55,14 +55,14 @@ test_that("meltSE", { assay_taxa_coldata <- mia:::.add_col_data_to_molten_assay(assay_taxa, se, - add_col_data=c("X.SampleID", "Primer"), + add.col=c("X.SampleID", "Primer"), "SampleID") expect_equal(colnames(molten_assay)[c(1:4,11)], c("FeatureID","SampleID","counts","Kingdom","X.SampleID")) expect_equal(is.numeric(assay_taxa_coldata$counts), TRUE) # - actual <- meltSE(x, add_row_data = TRUE, add_col_data = TRUE) - expect_warning(actual2 <- meltSE(x2, add_row_data = TRUE, add_col_data = TRUE)) + actual <- meltSE(x, add.row = TRUE, add.col = TRUE) + expect_warning(actual2 <- meltSE(x2, add.row = TRUE, add.col = TRUE)) expect_false("FeatureID_row" %in% colnames(actual)) expect_true("FeatureID_row" %in% colnames(actual2)) expect_false("SampleID_col" %in% colnames(actual)) @@ -70,15 +70,15 @@ test_that("meltSE", { x3 <- x2 rownames(x3) <- NULL colnames(x3) <- NULL - actual3 <- meltSE(x3, add_row_data = TRUE, add_col_data = TRUE) + actual3 <- meltSE(x3, add.row = TRUE, add.col = TRUE) expect_false("FeatureID_row" %in% colnames(actual)) expect_false("SampleID_col" %in% colnames(actual)) # x4 <- se # Change names to 1, 2, 3... format colnames(x4) <- seq_along(colnames(x4)) - melted <- meltSE(x4, assay.type = "counts", add_col_data = TRUE) - melted2 <- meltSE(x4, assay.type = "counts", add_col_data = TRUE, + melted <- meltSE(x4, assay.type = "counts", add.col = TRUE) + melted2 <- meltSE(x4, assay.type = "counts", add.col = TRUE, check_names = TRUE) # There should not be any NAs expect_true(any(!(is.na(melted)))) diff --git a/tests/testthat/test-2merge.R b/tests/testthat/test-2merge.R index 1f1ad9fc9..cbb539218 100644 --- a/tests/testthat/test-2merge.R +++ b/tests/testthat/test-2merge.R @@ -78,7 +78,7 @@ test_that("merge", { rowRanges = unname(grl)) FUN_check_x <- function(x,archetype=1){ actual <- agglomerateByVariable(x, MARGIN = "rows", f, archetype, - mergeTree = FALSE) + update.tree = FALSE) expect_s4_class(actual,class(x)) expect_equal(dim(actual),c(2,10)) } @@ -101,16 +101,16 @@ test_that("merge", { tse <- tse[c(rownames(esophagus), rownames(GlobalPatterns)), ] # Only esophagus has these groups --> the merge should contain only esophagus merged <- agglomerateByVariable(tse, MARGIN = "rows", - f = rowData(tse)$group2, mergeTree=TRUE) + f = rowData(tse)$group2, update.tree=TRUE) merged2 <- agglomerateByVariable(tse, MARGIN = "rows", - f = rowData(tse)$group2, mergeTree = FALSE) + f = rowData(tse)$group2, update.tree = FALSE) merged3 <- agglomerateByVariable(esophagus, MARGIN = "rows", f = rowData(esophagus)$group2, - mergeTree = TRUE) + update.tree = TRUE) merged4 <- .merge_features(tse, merge.by = rowData(tse)$group2, - mergeTree = TRUE) + update.tree = TRUE) merged5 <- agglomerateByVariable(tse, MARGIN = "rows", - f = rowData(tse)$group2, mergeTree = TRUE) + f = rowData(tse)$group2, update.tree = TRUE) expect_equal( rowLinks(merged)$whichTree, rowLinks(merged2)$whichTree ) expect_false( all(rowLinks(merged) == rowLinks(merged2)) ) @@ -124,9 +124,9 @@ test_that("merge", { # Both datasets have group variable merged <- agglomerateByVariable(tse, MARGIN = "rows", - f = rowData(tse)$group, mergeTree = TRUE) + f = rowData(tse)$group, update.tree = TRUE) merged2 <- agglomerateByVariable(tse, MARGIN = "rows", - f = rowData(tse)$group, mergeTree = FALSE) + f = rowData(tse)$group, update.tree = FALSE) expect_equal( rowLinks(merged)$whichTree, rowLinks(merged2)$whichTree ) expect_false( all(rowLinks(merged) == rowLinks(merged2)) ) diff --git a/tests/testthat/test-2mergeSEs.R b/tests/testthat/test-2mergeSEs.R index 5f1ec915d..de147e3cc 100644 --- a/tests/testthat/test-2mergeSEs.R +++ b/tests/testthat/test-2mergeSEs.R @@ -14,14 +14,14 @@ test_that("mergeSEs", { expect_error( mergeSEs(tse1, tse2, join = 1) ) expect_error( mergeSEs(tse1, tse2, join = TRUE) ) expect_error( mergeSEs(tse1, tse2, join = NA) ) - expect_error( mergeSEs(tse1, tse2, collapse_samples = NA) ) - expect_error( mergeSEs(tse1, tse2, collapse_samples = 1) ) - expect_error( mergeSEs(tse1, tse2, collapse_samples = "test") ) - expect_error( mergeSEs(tse1, tse2, collapse_samples = NULL) ) + expect_error( mergeSEs(tse1, tse2, collapse.cols = NA) ) + expect_error( mergeSEs(tse1, tse2, collapse.cols = 1) ) + expect_error( mergeSEs(tse1, tse2, collapse.cols = "test") ) + expect_error( mergeSEs(tse1, tse2, collapse.cols = NULL) ) expect_error( mergeSEs(list(tse1, tse2, tse), join = "left") ) expect_error( mergeSEs(list(tse1, tse2, tse), join = "right") ) - expect_error( mergeSEs(tse1, tse2, missing_values = TRUE ) ) - expect_error( mergeSEs(tse1, tse2, missing_values = 36846 ) ) + expect_error( mergeSEs(tse1, tse2, missing.values = TRUE ) ) + expect_error( mergeSEs(tse1, tse2, missing.values = 36846 ) ) expect_error( mergeSEs(tse1, tse2, assay.type = "test") ) # Calculate relative transform to test assay.type tse1 <- transformAssay(tse1, method = "relabundance") @@ -54,26 +54,26 @@ test_that("mergeSEs", { # Expect that rowTree is preserved if rownames match tse <- mergeSEs(list(tse1, GlobalPatterns), assay.type = "counts", - missing_values = NA) + missing.values = NA) expect_equal(rowTree(GlobalPatterns), rowTree(tse)) # Expect some NAs tse <- mergeSEs(list(tse1, tse2), assay.type = "counts") expect_true( any(is.na(assay(tse))) ) # Test that dimensions match - tse <- mergeSEs(tse1, tse2, missing_values = 0) + tse <- mergeSEs(tse1, tse2, missing.values = 0) expect_equal( dim(tse), dim(tse1)+dim(tse2) ) # Expect no NAs in assay expect_true( all(!is.na(assay(tse))) ) # Check that rows are merged correctly when all the rowData is used to # specify rows - test <- mergeSEs(tse1, tse2, missing_values = 0, only.taxonomy = FALSE) + test <- mergeSEs(tse1, tse2, missing.values = 0, only.taxonomy = FALSE) expect_equal(tse, test) # Test that dimensions match tse <- suppressWarnings( - mergeSEs(list(tse1, tse2, tse3), missing_values = "MISSING") + mergeSEs(list(tse1, tse2, tse3), missing.values = "MISSING") ) expect_equal( dim(tse), dim(tse1)+dim(tse2)+dim(tse3) ) # Expect some "MISSING"s @@ -92,7 +92,7 @@ test_that("mergeSEs", { # CHECK FULL JOIN ################################################### tse <- suppressWarnings( mergeSEs(list(tse2, tse3, tse1, tse1[1:2, ], tse1[1, ]), - missing_values = NA) + missing.values = NA) ) # Get assay (as.matrix to remove links) assay <- as.matrix( assay(tse, "counts") ) @@ -153,7 +153,7 @@ test_that("mergeSEs", { expect_true( nrow(tse) == 0 ) expect_equal( rowTree(tse), NULL ) tse <- mergeSEs(list(tse1[, 1:5], tse1[, 5:10], tse1[1:20, 6:10]), - join = "inner", collapse_samples = TRUE) + join = "inner", collapse.cols = TRUE) expect_true( all(dim(tse) == c(20, 10)) ) expect_equal( rowTree(tse), rowTree(tse1) ) # Get assay (as.matrix to remove links) @@ -185,7 +185,7 @@ test_that("mergeSEs", { # CHECK LEFT JOIN ############################################## tse <- mergeSEs(list(tse1[11:20, 1:13], tse1[10:50, 7:20]), - join = "left", collapse_samples = TRUE) + join = "left", collapse.cols = TRUE) expect_true( all(dim(tse) == c(10, 20)) ) expect_equal( rowTree(tse), rowTree(tse1) ) # Get assay (as.matrix to remove links) @@ -217,8 +217,8 @@ test_that("mergeSEs", { # CHECK RIGHT JOIN ############################################## tse <- mergeSEs(list(tse1[10:50, 1:13], tse1[1:10, 7:20]), - join = "right", missing_values = NA, - collapse_samples = TRUE) + join = "right", missing.values = NA, + collapse.cols = TRUE) expect_true( all(dim(tse) == c(10, 20)) ) expect_equal( rowTree(tse), rowTree(tse1) ) # Get assay (as.matrix to remove links) @@ -303,7 +303,7 @@ test_that("mergeSEs", { join = "left") expect_true(class(tse) == "TreeSummarizedExperiment") - # Test collapse_samples + # Test collapse.cols tse_test <- mergeSEs(x = tse[1:28, 1:3], y = tse[23, 1:5], join = "full") diff --git a/tests/testthat/test-2taxonomy.R b/tests/testthat/test-2taxonomy.R index ea06deab9..68e9b0de7 100644 --- a/tests/testthat/test-2taxonomy.R +++ b/tests/testthat/test-2taxonomy.R @@ -62,10 +62,10 @@ test_that("taxonomy", { expect_equal(getTaxonomyLabels(xtse), c("Family:j","Phylum:a","Family:k","Family:l","Family:m", "Family:n","Family:o","Phylum:c","Family:o_1")) - expect_equal(getTaxonomyLabels(xtse, make_unique = FALSE), + expect_equal(getTaxonomyLabels(xtse, make.unique = FALSE), c("Family:j","Phylum:a","Family:k","Family:l","Family:m", "Family:n","Family:o","Phylum:c","Family:o")) - expect_equal(getTaxonomyLabels(xtse, resolve_loops = TRUE), + expect_equal(getTaxonomyLabels(xtse, resolve.loops = TRUE), c("Family:j","Phylum:a","Family:k","Family:l","Family:m", "Family:n","Family:o_1","Phylum:c","Family:o_2")) @@ -85,10 +85,10 @@ test_that("taxonomy", { actual <- mia:::.get_taxa_any_match("Escherichia", td) expect_type(actual,"logical") expect_length(actual,nrow(td)) - actual <- mia:::.get_taxa_row_match("Escherichia", td, "Genus", use_grepl = TRUE) + actual <- mia:::.get_taxa_row_match("Escherichia", td, "Genus", use.grepl = TRUE) expect_type(actual,"logical") expect_length(actual,nrow(td)) - actual <- mia:::.get_taxa_any_match("Escherichia", td, use_grepl = TRUE) + actual <- mia:::.get_taxa_any_match("Escherichia", td, use.grepl = TRUE) expect_type(actual,"logical") expect_length(actual,nrow(td)) actual <- mia:::.get_taxa_row_match(NA_character_, td, "Genus") diff --git a/tests/testthat/test-3agglomerate.R b/tests/testthat/test-3agglomerate.R index 01913e75f..025e6e8d6 100644 --- a/tests/testthat/test-3agglomerate.R +++ b/tests/testthat/test-3agglomerate.R @@ -34,8 +34,8 @@ test_that("agglomerate", { expect_error(agglomerateByRank(xtse,"Family",na.rm=""), "'na.rm' must be TRUE or FALSE") expect_error( - agglomerateByRank(xtse,"Family",na.rm=FALSE,agglomerate.tree=""), - "'agglomerate.tree' must be TRUE or FALSE") + agglomerateByRank(xtse,"Family",na.rm=FALSE,update.tree=""), + "'update.tree' must be TRUE or FALSE") xtse2 <- xtse rowData(xtse2) <- NULL expect_error(agglomerateByRank(xtse2,"Family",na.rm=FALSE), @@ -46,10 +46,10 @@ test_that("agglomerate", { actual <- agglomerateByRank(xtse,"Phylum",na.rm=FALSE) expect_equivalent(rowData(actual),rowData(actual_phylum)) # - actual <- agglomerateByRank(xtse,"Family", onRankOnly = FALSE, na.rm = TRUE) + actual <- agglomerateByRank(xtse,"Family", ignore.taxonomy = FALSE, na.rm = TRUE) expect_equal(dim(actual),c(6,10)) expect_equal(rowData(actual)$Family,c("c","d","e","f","g","h")) - actual <- agglomerateByRank(xtse,"Family", onRankOnly = FALSE, na.rm = FALSE) # the default + actual <- agglomerateByRank(xtse,"Family", ignore.taxonomy = FALSE, na.rm = FALSE) # the default expect_equal(dim(actual),c(8,10)) expect_equal(rowData(actual)$Family,c("c","d","e","f","g","h",NA,NA)) actual <- agglomerateByRank(xtse,"Phylum") @@ -64,23 +64,23 @@ test_that("agglomerate", { # the same dimensionality is retained data(enterotype, package="mia") expect_equal(length(unique(rowData(enterotype)[,"Genus"])), - nrow(agglomerateByRank(enterotype,"Genus", onRankOnly = FALSE, + nrow(agglomerateByRank(enterotype,"Genus", ignore.taxonomy = FALSE, na.rm = FALSE))) # agglomeration in all its forms data(GlobalPatterns, package="mia") se <- GlobalPatterns actual <- agglomerateByRank(se, rank = "Family", - onRankOnly = FALSE, na.rm = FALSE) + ignore.taxonomy = FALSE, na.rm = FALSE) expect_equal(dim(actual),c(603,26)) expect_equal(length(rowTree(actual)$tip.label), length(rowTree(se)$tip.label)) actual <- agglomerateByRank(se, rank = "Family", - onRankOnly = FALSE, na.rm = FALSE, agglomerate.tree = TRUE) + ignore.taxonomy = FALSE, na.rm = FALSE, update.tree = TRUE) expect_equal(dim(actual),c(603,26)) expect_equal(length(rowTree(actual)$tip.label), 603) actual <- agglomerateByRank(se, rank = "Family", - onRankOnly = FALSE, na.rm = FALSE, agglomerate.tree = TRUE) + ignore.taxonomy = FALSE, na.rm = FALSE, update.tree = TRUE) expect_equal(dim(actual),c(603,26)) expect_equal(length(rowTree(actual)$tip.label), nrow(actual)) # Test that warning occurs when assay contian binary or negative values @@ -121,33 +121,33 @@ test_that("agglomerate", { expect_equal(nrow(test0), 945) expect_equal(nrow(test1), 2307) - # Test that remove_empty_ranks work + # Test that empty.ranks.rm work expect_error( - agglomerateByRank(tse, rank = "Class", remove_empty_ranks = NULL)) + agglomerateByRank(tse, rank = "Class", empty.ranks.rm = NULL)) expect_error( - agglomerateByRank(tse, rank = "Class", remove_empty_ranks = "NULL")) + agglomerateByRank(tse, rank = "Class", empty.ranks.rm = "NULL")) expect_error( - agglomerateByRank(tse, rank = "Class", remove_empty_ranks = 1)) + agglomerateByRank(tse, rank = "Class", empty.ranks.rm = 1)) expect_error( agglomerateByRank( - tse, rank = "Class", remove_empty_ranks = c(TRUE, TRUE))) + tse, rank = "Class", empty.ranks.rm = c(TRUE, TRUE))) # Add a column to rowData(se) to test that only NA rank columns are removed - # when remove_empty_ranks = TRUE + # when empty.ranks.rm = TRUE rank <- "Class" rowData(tse)[["test"]] <- rep(NA, nrow(rowData(tse))) x <- agglomerateByRank(tse, rank = rank) rd1 <- rowData(x) - x <- agglomerateByRank(tse, rank = rank, remove_empty_ranks = TRUE) + x <- agglomerateByRank(tse, rank = rank, empty.ranks.rm = TRUE) rd2 <- rowData(x) cols <- taxonomyRanks(tse)[ seq_len(which(taxonomyRanks(tse) == "Class")) ] cols <- c(cols, "test") expect_equal(rd1[, cols], rd2[, cols]) expect_true( ncol(rd1) > ncol(rd2) ) - # Test that make_unique work + # Test that make.unique work uniq <- agglomerateByRank(tse, rank = "Species", na.rm = FALSE) not_uniq <- agglomerateByRank( - tse, rank = "Species", make_unique = FALSE, na.rm = FALSE) + tse, rank = "Species", make.unique = FALSE, na.rm = FALSE) expect_true( !any( duplicated(rownames(uniq)) ) ) expect_true( any( duplicated(rownames(not_uniq)) ) ) @@ -157,7 +157,7 @@ test_that("agglomerate", { se <- SilvermanAGutData # checking reference consensus sequence generation - actual <- agglomerateByRank(se,"Genus", mergeRefSeq = FALSE) + actual <- agglomerateByRank(se,"Genus", update.refseq = FALSE) # There should be only one exact match for each sequence seqs_test <- as.character( referenceSeq(actual) ) seqs_ref <- as.character( referenceSeq(se) ) @@ -167,7 +167,7 @@ test_that("agglomerate", { # Merging creates consensus sequences. th <- runif(1, 0, 1) actual <- agglomerateByRank( - se, "Genus", mergeRefSeq = TRUE, threshold = th) + se, "Genus", update.refseq = TRUE, threshold = th) seqs_test <- referenceSeq(actual) # Get single taxon as reference. Merge those sequences and test that it # equals to one that is output of agglomerateByRank @@ -181,14 +181,14 @@ test_that("agglomerate", { expect_equal(seqs_test, seqs_ref) # checking reference consensus sequence generation using 'Genus:Alistipes' - actual <- agglomerateByRank(se,"Genus", mergeRefSeq = FALSE) + actual <- agglomerateByRank(se,"Genus", update.refseq = FALSE) expect_equal(as.character(referenceSeq(actual)[["Alistipes"]]), paste0("TCAAGCGTTATCCGGATTTATTGGGTTTAAAGGGTGCGTAGGCGGTTTGATAA", "GTTAGAGGTGAAATCCCGGGGCTTAACTCCGGAACTGCCTCTAATACTGTTAG", "ACTAGAGAGTAGTTGCGGTAGGCGGAATGTATGGTGTAGCGGTGAAATGCTTA", "GAGATCATACAGAACACCGATTGCGAAGGCAGCTTACCAAACTATATCTGACG", "TTGAGGCACGAAAGCGTGGGG")) - actual <- agglomerateByRank(se,"Genus", mergeRefSeq = TRUE) + actual <- agglomerateByRank(se,"Genus", update.refseq = TRUE) expect_equal(as.character(referenceSeq(actual)[["Alistipes"]]), paste0("BCNMKCKTTVWYCKKMHTTMYTKKKYKTMMMKNKHDYKYMKDYKKNHNNNYMM", "KHHNDNNKTKMMMDNBHNBKKCTYMMCHNBNDDDNKSSHBNNRWDMYKKBNND", diff --git a/tests/testthat/test-4IO.R b/tests/testthat/test-4IO.R index 05c0b4248..3f1be3da0 100644 --- a/tests/testthat/test-4IO.R +++ b/tests/testthat/test-4IO.R @@ -17,9 +17,9 @@ test_that("Importing biom files yield SummarizedExperiment objects", { package="mia") ) tse <- makeTreeSEFromBiom(biom_object, - removeTaxaPrefixes = FALSE, - rankFromPrefix = FALSE, - remove.artifacts = TRUE, + prefix.rm = FALSE, + rank.from.prefix = FALSE, + artifact.rm = TRUE, pattern = "\"") # Testing no prefixes removed expect_true(rowData(tse) %>% @@ -29,15 +29,15 @@ test_that("Importing biom files yield SummarizedExperiment objects", { expect_false( sapply(tolower(colnames(rowData(tse))), function(x) x %in% TAXONOMY_RANKS) %>% all()) - # Testing the remove.artifacts, since the original artifact in the biom file + # Testing the artifact.rm, since the original artifact in the biom file # is '\"' expect_false(apply(rowData(tse), 2, grepl, pattern="^\"") %>% all()) # Testing prefixes removed tse <- makeTreeSEFromBiom(biom_object, - removeTaxaPrefixes=TRUE, - rankFromPrefix=TRUE, - remove.artifacts = TRUE, + prefix.rm=TRUE, + rank.from.prefix=TRUE, + artifact.rm = TRUE, pattern = "\"") expect_false(rowData(tse) %>% apply(2,grepl,pattern="sk__|([dkpcofgs]+)__") %>% @@ -45,45 +45,45 @@ test_that("Importing biom files yield SummarizedExperiment objects", { # Testing parsing taxonomy ranks from prefixes tse <- makeTreeSEFromBiom(biom_object, - removeTaxaPrefixes=FALSE, - rankFromPrefix=TRUE, - remove.artifacts = TRUE, + prefix.rm=FALSE, + rank.from.prefix=TRUE, + artifact.rm = TRUE, pattern = "\"") expect_true( sapply(tolower(colnames(rowData(tse))), function(x) x %in% TAXONOMY_RANKS) %>% all()) - # Testing the remove.artifacts, the original artifact in the biom file + # Testing the artifact.rm, the original artifact in the biom file # is '\"', as a test we rather try remove a non existing pattern. tse <- makeTreeSEFromBiom(biom_object, - removeTaxaPrefixes=FALSE, - rankFromPrefix=FALSE, - remove.artifacts = TRUE, + prefix.rm=FALSE, + rank.from.prefix=FALSE, + artifact.rm = TRUE, pattern = "\\*|\\?") # with wrong pattern artifact not cleaned expect_true(apply(rowData(tse), 2, grepl, pattern="\"") %>% any()) - # Testing the remove.artifacts, with the value 'auto' to automatically + # Testing the artifact.rm, with the value 'auto' to automatically # detect the artifact and remove it (in our case the artifact is '\"'). tse <- makeTreeSEFromBiom(biom_object, - removeTaxaPrefixes=FALSE, - rankFromPrefix=FALSE, - remove.artifacts = TRUE) + prefix.rm=FALSE, + rank.from.prefix=FALSE, + artifact.rm = TRUE) # Checking if 'auto' has detected and cleaned the artifact expect_false(apply(rowData(tse), 2, grepl, pattern="\"") %>% any()) - # Testing the remove.artifacts, with the value NULL to not detect or clean + # Testing the artifact.rm, with the value NULL to not detect or clean # anything. tse <- makeTreeSEFromBiom(biom_object, - removeTaxaPrefixes=FALSE, - rankFromPrefix=FALSE, - remove.artifacts = FALSE) + prefix.rm=FALSE, + rank.from.prefix=FALSE, + artifact.rm = FALSE) # Checking if the '\"' artifact still exists. expect_true(apply(rowData(tse), 2, grepl, pattern="\"") %>% any()) # General final test tse <- makeTreeSEFromBiom(biom_object, - removeTaxaPrefixes=TRUE, - rankFromPrefix=TRUE, - remove.artifacts = TRUE) + prefix.rm=TRUE, + rank.from.prefix=TRUE, + artifact.rm = TRUE) # check if '\"' cleaned expect_false(apply(rowData(tse), 2, grepl, pattern="\"") %>% any()) # check if taxa prefixes removed @@ -101,9 +101,9 @@ test_that("Importing biom files yield SummarizedExperiment objects", { package = "biomformat") ) tse <- makeTreeSEFromBiom(biom_object, - removeTaxaPrefixes=TRUE, - rankFromPrefix=TRUE, - remove.artifacts = TRUE) + prefix.rm=TRUE, + rank.from.prefix=TRUE, + artifact.rm = TRUE) # check if taxa prefixes removed expect_false(rowData(tse) %>% apply(2,grepl,pattern="sk__|([dkpcofgs]+)__") %>% @@ -159,14 +159,12 @@ test_that("Importing Mothur files yield SummarizedExperiment objects", { se2 <- importMothur(counts, taxa2) expect_s4_class(se, "SummarizedExperiment") expect_s4_class(se2, "SummarizedExperiment") - expect_error(importMothur(counts, meta)) - expect_error(importMothur(counts, meta)) - se <- importMothur(counts, designFile = meta) - se2 <- importMothur(counts, designFile = meta) + se <- importMothur(counts, col.file = meta) + se2 <- importMothur(counts, col.file = meta) expect_s4_class(se, "SummarizedExperiment") expect_s4_class(se2, "SummarizedExperiment") - se <- importMothur(counts, taxa, meta) - se2 <- importMothur(counts, taxa2, meta) + se <- importMothur(assay.file = counts, row.file = taxa, col.file = meta) + se2 <- importMothur(assay.file = counts, row.file = taxa2, col.file = meta) expect_s4_class(se, "SummarizedExperiment") expect_s4_class(se2, "SummarizedExperiment") @@ -218,15 +216,15 @@ test_that("Importing Mothur files yield SummarizedExperiment objects", { c("group", "sex", "age", "drug", "label", "numOtus", "Group")) }) -featureTableFile <- system.file("extdata", "table.qza", package = "mia") -taxonomyTableFile <- system.file("extdata", "taxonomy.qza", package = "mia") -sampleMetaFile <- system.file("extdata", "sample-metadata.tsv", package = "mia") -refSeqFile <- system.file("extdata", "refseq.qza", package = "mia") +assay.file <- system.file("extdata", "table.qza", package = "mia") +row.file <- system.file("extdata", "taxonomy.qza", package = "mia") +col.file <- system.file("extdata", "sample-metadata.tsv", package = "mia") +refseq.file <- system.file("extdata", "refseq.qza", package = "mia") test_that("make TSE worked properly while no sample or taxa data", { skip_if_not(require("biomformat", quietly = TRUE)) ## no sample data or taxa data - expect_silent(tse <- importQIIME2(featureTableFile)) + expect_silent(tse <- importQIIME2(assay.file)) expect_s4_class(tse, "TreeSummarizedExperiment") expect_equal(dim(tse), c(770,34)) }) @@ -235,21 +233,21 @@ test_that("reference sequences of TSE", { skip_if_not(require("biomformat", quietly = TRUE)) # 1. fasta file of refseq tse <- importQIIME2( - featureTableFile, - refSeqFile = refSeqFile + assay.file, + refseq.file = refseq.file ) tse2 <- importQIIME2( - featureTableFile, - refSeqFile = refSeqFile, + assay.file, + refseq.file = refseq.file, featureNamesAsRefseq = FALSE ) - expect_identical(tse@referenceSeq, importQZA(refSeqFile)) - expect_identical(tse2@referenceSeq, importQZA(refSeqFile)) + expect_identical(tse@referenceSeq, importQZA(refseq.file)) + expect_identical(tse2@referenceSeq, importQZA(refseq.file)) # 2. row.names of feature table as refseq # 2.1 element of row.names of feature table is not DNA sequence tse <- importQIIME2( - featureTableFile, + assay.file, featureNamesAsRefseq = TRUE ) expect_null(tse@referenceSeq) @@ -259,7 +257,7 @@ test_that("reference sequences of TSE", { # codes used for create sample data (donot run) if (FALSE) { .require_package("biomformat") - feature_tab <- importQZA(featureTableFile) + feature_tab <- importQZA(assay.file) n_feature <- nrow(feature_tab) random_seq <- sapply( rep(20, n_feature), @@ -287,7 +285,7 @@ test_that("reference sequences of TSE", { package = "mia" ) - # featureNamesAsRefseq is TRUE, refSeqFile is NULL, set row.names of + # featureNamesAsRefseq is TRUE, refseq.file is NULL, set row.names of # feature table as reference sequences tse <- importQIIME2( featureTableFile2, @@ -298,19 +296,19 @@ test_that("reference sequences of TSE", { names(names_seq) <- paste0("seq_", seq_along(names_seq)) expect_identical(tse@referenceSeq, names_seq) - # refSeqFile is not NULL, featureNamesAsRefseq is TRUE, - # set the sequences from refSeqFile as reference sequences + # refseq.file is not NULL, featureNamesAsRefseq is TRUE, + # set the sequences from refseq.file as reference sequences tse <- importQIIME2( featureTableFile2, featureNamesAsRefseq = TRUE, - refSeqFile = refSeqFile + refseq.file = refseq.file ) - expect_identical(tse@referenceSeq, importQZA(refSeqFile)) + expect_identical(tse@referenceSeq, importQZA(refseq.file)) - # 3. refSeqFile = NULL, featureNamesAsRefseq = FALSE + # 3. refseq.file = NULL, featureNamesAsRefseq = FALSE tse <- importQIIME2( - featureTableFile, - refSeqFile = NULL, + assay.file, + refseq.file = NULL, featureNamesAsRefseq = FALSE ) expect_null(tse@referenceSeq) @@ -347,12 +345,12 @@ test_that("`.parse_taxonomy` work with any combination of taxonomic ranks", { dimnames = list(c("a", "b"), c("Feature.ID", "Taxon", "Confidence")) ) expect_equal(mia:::.parse_taxonomy(test_taxa)[,"Species"],c("s__test",NA)) - expect_equal(mia:::.parse_taxonomy(test_taxa, removeTaxaPrefixes = TRUE)[,"Species"], + expect_equal(mia:::.parse_taxonomy(test_taxa, prefix.rm = TRUE)[,"Species"], c("test",NA)) }) test_that("`.read_q2sample_meta` remove the row contained `#q2:types`", { - expect_false(any(as(.read_q2sample_meta(sampleMetaFile), "matrix") == "#q2:types")) + expect_false(any(as(.read_q2sample_meta(col.file), "matrix") == "#q2:types")) }) test_that('get file extension', { @@ -362,24 +360,24 @@ test_that('get file extension', { test_that('read qza file', { expect_error(importQZA("abc"), "does not exist") - expect_error(importQZA(sampleMetaFile), "must be in `qza` format") + expect_error(importQZA(col.file), "must be in `qza` format") }) test_that("Confidence of taxa is numberic", { skip_if_not(require("biomformat", quietly = TRUE)) tse <- importQIIME2( - featureTableFile, - taxonomyTableFile = taxonomyTableFile + assay.file, + row.file = row.file ) expect_true(is.numeric(S4Vectors::mcols(tse)$Confidence)) }) test_that("dimnames of feature table is identicle with meta data", { skip_if_not(require("biomformat", quietly = TRUE)) - feature_tab <- importQZA(featureTableFile) + feature_tab <- importQZA(assay.file) - sample_meta <- .read_q2sample_meta(sampleMetaFile) - taxa_meta <- importQZA(taxonomyTableFile) + sample_meta <- .read_q2sample_meta(col.file) + taxa_meta <- importQZA(row.file) taxa_meta <- .subset_taxa_in_feature(taxa_meta, feature_tab) new_feature_tab <- .set_feature_tab_dimnames( feature_tab, @@ -445,7 +443,7 @@ test_that("makePhyloseqFromTreeSE", { # Test with agglomeration that that pruning is done internally test1 <- agglomerateByRank(tse, rank = "Phylum") - test2 <- agglomerateByRank(tse, rank = "Phylum", agglomerate.tree = TRUE) + test2 <- agglomerateByRank(tse, rank = "Phylum", update.tree = TRUE) test1_phy <- expect_warning(makePhyloseqFromTreeSE(test1)) test2_phy <- makePhyloseqFromTreeSE(test2) @@ -481,7 +479,7 @@ test_that("makePhyloseqFromTreeSE", { expect_identical(phyloseq::phy_tree(phy), rowTree(tse)) # Test that merging objects lead to correct phyloseq - tse <- mergeSEs(GlobalPatterns, esophagus, assay.type="counts", missing_values = 0) + tse <- mergeSEs(GlobalPatterns, esophagus, assay.type="counts", missing.values = 0) pseq <- makePhyloseqFromTreeSE(tse, assay.type="counts") tse_compare <- tse[ rownames(GlobalPatterns), ] diff --git a/tests/testthat/test-4splitOn.R b/tests/testthat/test-4splitOn.R index 4490b7880..5646afc54 100644 --- a/tests/testthat/test-4splitOn.R +++ b/tests/testthat/test-4splitOn.R @@ -18,15 +18,15 @@ test_that("splitOn", { expect_error(splitOn(x, rowData(x)$Phylum, MARGIN = 2)) expect_error(splitOn(x)) expect_error(splitOn(assay(x), x$SampleType)) - expect_error(splitOn(x, "SampleType", use_names = 1)) - expect_error(splitOn(x, "SampleType", use_names = "TRUE")) - expect_error(splitOn(x, "SampleType", update_rowTree = 1)) - expect_error(splitOn(x, "SampleType", update_rowTree = "TRUE")) + expect_error(splitOn(x, "SampleType", use.names = 1)) + expect_error(splitOn(x, "SampleType", use.names = "TRUE")) + expect_error(splitOn(x, "SampleType", update.tree = 1)) + expect_error(splitOn(x, "SampleType", update.tree = "TRUE")) # Test that names of elemetns are correct list <- splitOn(x, "SampleType") expect_equal(names(list), as.character(unique(x$SampleType)) ) - list <- splitOn(x, "SampleType", use_names = FALSE) + list <- splitOn(x, "SampleType", use.names = FALSE) expect_equal(names(list), NULL ) # Test that col-wie split is done correctly @@ -41,7 +41,7 @@ test_that("splitOn", { # Test that number of tips of updated rowTree equals number of rows for # each tse in the list returned - list <- splitOn(x, "SampleType", update_rowTree = TRUE) + list <- splitOn(x, "SampleType", update.tree = TRUE) for (k in length(list)){ expect_equal( length(rowTree(list[[k]], "phylo")$tip.label), nrow(list[[k]]) ) @@ -53,8 +53,8 @@ test_that("splitOn", { mod_list <- list mod_list[[1]] <- mod_list[[1]][1:2, 1:2] expect_error( unsplitOn(mod_list) ) - expect_error(unsplitOn(list, update_rowTree = 1)) - expect_error(unsplitOn(list, update_rowTree = "TRUE")) + expect_error(unsplitOn(list, update.tree = 1)) + expect_error(unsplitOn(list, update.tree = "TRUE")) # Test that works x_sub <- x[1:100, 1:10] diff --git a/tests/testthat/test-5Unifrac.R b/tests/testthat/test-5Unifrac.R index 023842946..294607541 100644 --- a/tests/testthat/test-5Unifrac.R +++ b/tests/testthat/test-5Unifrac.R @@ -5,31 +5,31 @@ test_that("Unifrac beta diversity", { tse <- transformAssay(tse, assay.type="counts", method="relabundance") expect_error( - calculateUnifrac(tse, assay.type = "test", tree_name = "phylo", + calculateUnifrac(tse, assay.type = "test", tree.name = "phylo", weighted = FALSE) ) expect_error( - calculateUnifrac(tse, assay.type = 2, tree_name = "phylo", + calculateUnifrac(tse, assay.type = 2, tree.name = "phylo", weighted = FALSE) ) expect_error( - calculateUnifrac(tse, assay.type = TRUE, tree_name = "phylo", + calculateUnifrac(tse, assay.type = TRUE, tree.name = "phylo", weighted = FALSE) ) expect_error( - calculateUnifrac(tse, assay.type = "counts", tree_name = "test", + calculateUnifrac(tse, assay.type = "counts", tree.name = "test", weighted = FALSE) ) expect_error( - calculateUnifrac(tse, assay.type = "counts", tree_name = 1, + calculateUnifrac(tse, assay.type = "counts", tree.name = 1, weighted = FALSE) ) expect_error( - calculateUnifrac(tse, assay.type = "counts", tree_name = TRUE, + calculateUnifrac(tse, assay.type = "counts", tree.name = TRUE, weighted = "FALSE") ) expect_error( - calculateUnifrac(tse, assay.type = "counts", tree_name = "phylo", + calculateUnifrac(tse, assay.type = "counts", tree.name = "phylo", weighted = 1) ) @@ -50,7 +50,7 @@ test_that("Unifrac beta diversity", { # Test with merged object with multiple trees. runUnifrac takes subset of # data based on provided tree. - tse <- mergeSEs(GlobalPatterns, esophagus, assay.type="counts", missing_values = 0) + tse <- mergeSEs(GlobalPatterns, esophagus, assay.type="counts", missing.values = 0) tse_ref <- tse tse_ref <- tse_ref[ rowLinks(tse_ref)[["whichTree"]] == "phylo", ] # Calculate unweighted unifrac diff --git a/tests/testthat/test-5dominantTaxa.R b/tests/testthat/test-5dominantTaxa.R index 1e1d87acd..fe3fa6aaf 100644 --- a/tests/testthat/test-5dominantTaxa.R +++ b/tests/testthat/test-5dominantTaxa.R @@ -33,7 +33,7 @@ test_that("getDominant", { names(exp.vals.two) <- exp.names.one expect_equal(getDominant(tse, rank = "Genus", - onRankOnly = FALSE, + ignore.taxonomy = FALSE, na.rm = FALSE)[1:15], exp.vals.two) diff --git a/tests/testthat/test-5getCrossAssociation.R b/tests/testthat/test-5getCrossAssociation.R index 7a0d9ac7e..1eb29e12e 100644 --- a/tests/testthat/test-5getCrossAssociation.R +++ b/tests/testthat/test-5getCrossAssociation.R @@ -30,11 +30,11 @@ test_that("getCrossAssociation", { assay.type2 = "counts", method = "spearman", mode = "table", - p_adj_method = "fdr", - p_adj_threshold = 0.05, - cor_threshold = NULL, + p.adj.method = "fdr", + p.adj.threshold = 0.05, + cor.threshold = NULL, sort = FALSE, - filter_self_correlations = FALSE, + filter.self.cor = FALSE, verbose = TRUE)) expect_error(getCrossAssociation(mae, experiment1 = 3, @@ -44,11 +44,11 @@ test_that("getCrossAssociation", { altexp1 = 1, altexp2 = NULL, method = "spearman", mode = "table", - p_adj_method = "fdr", - p_adj_threshold = 0.05, - cor_threshold = NULL, + p.adj.method = "fdr", + p.adj.threshold = 0.05, + cor.threshold = NULL, sort = FALSE, - filter_self_correlations = FALSE, + filter.self.cor = FALSE, verbose = TRUE)) expect_error(getCrossAssociation(mae, experiment1 = 3, @@ -59,11 +59,11 @@ test_that("getCrossAssociation", { altexp2 = NULL, method = "spearman", mode = "table", - p_adj_method = "fdr", - p_adj_threshold = 0.05, - cor_threshold = NULL, + p.adj.method = "fdr", + p.adj.threshold = 0.05, + cor.threshold = NULL, sort = FALSE, - filter_self_correlations = FALSE, + filter.self.cor = FALSE, verbose = TRUE)) expect_error(getCrossAssociation(mae, experiment1 = 3, @@ -74,11 +74,11 @@ test_that("getCrossAssociation", { altexp1 = NULL, method = "spearman", mode = "table", - p_adj_method = "fdr", - p_adj_threshold = 0.05, - cor_threshold = NULL, + p.adj.method = "fdr", + p.adj.threshold = 0.05, + cor.threshold = NULL, sort = FALSE, - filter_self_correlations = FALSE, + filter.self.cor = FALSE, verbose = TRUE)) expect_error(getCrossAssociation(mae, experiment1 = 1, @@ -87,11 +87,11 @@ test_that("getCrossAssociation", { assay.type2 = "counts", method = "spearman", mode = "table", - p_adj_method = "fdr", - p_adj_threshold = 0.05, - cor_threshold = NULL, + p.adj.method = "fdr", + p.adj.threshold = 0.05, + cor.threshold = NULL, sort = FALSE, - filter_self_correlations = FALSE, + filter.self.cor = FALSE, verbose = TRUE)) expect_error(getCrossAssociation(mae, experiment1 = 1, @@ -100,11 +100,11 @@ test_that("getCrossAssociation", { assay.type2 = "counts", method = "spearman", mode = "table", - p_adj_method = "fdr", - p_adj_threshold = 0.05, - cor_threshold = NULL, + p.adj.method = "fdr", + p.adj.threshold = 0.05, + cor.threshold = NULL, sort = FALSE, - filter_self_correlations = FALSE, + filter.self.cor = FALSE, verbose = TRUE)) expect_error(getCrossAssociation(mae, experiment1 = 1, @@ -113,11 +113,11 @@ test_that("getCrossAssociation", { assay.type2 = "counts", method = 1, mode = "table", - p_adj_method = "fdr", - p_adj_threshold = 0.05, - cor_threshold = NULL, + p.adj.method = "fdr", + p.adj.threshold = 0.05, + cor.threshold = NULL, sort = FALSE, - filter_self_correlations = FALSE, + filter.self.cor = FALSE, verbose = TRUE)) expect_error(getCrossAssociation(mae, experiment1 = 1, @@ -126,11 +126,11 @@ test_that("getCrossAssociation", { assay.type2 = "counts", method = FALSE, mode = "table", - p_adj_method = "fdr", - p_adj_threshold = 0.05, - cor_threshold = NULL, + p.adj.method = "fdr", + p.adj.threshold = 0.05, + cor.threshold = NULL, sort = FALSE, - filter_self_correlations = FALSE, + filter.self.cor = FALSE, verbose = TRUE)) expect_error(getCrossAssociation(mae, experiment1 = 1, @@ -139,11 +139,11 @@ test_that("getCrossAssociation", { assay.type2 = "counts", method = "pearson", mode = TRUE, - p_adj_method = "fdr", - p_adj_threshold = 0.05, - cor_threshold = NULL, + p.adj.method = "fdr", + p.adj.threshold = 0.05, + cor.threshold = NULL, sort = FALSE, - filter_self_correlations = FALSE, + filter.self.cor = FALSE, verbose = TRUE)) expect_error(getCrossAssociation(mae, experiment1 = 1, @@ -152,11 +152,11 @@ test_that("getCrossAssociation", { assay.type2 = "counts", method = "pearson", mode = "matrix", - p_adj_method = 1, - p_adj_threshold = 0.05, - cor_threshold = NULL, + p.adj.method = 1, + p.adj.threshold = 0.05, + cor.threshold = NULL, sort = FALSE, - filter_self_correlations = FALSE, + filter.self.cor = FALSE, verbose = TRUE)) expect_error(getCrossAssociation(mae, experiment1 = 1, @@ -165,11 +165,11 @@ test_that("getCrossAssociation", { assay.type2 = "counts", method = "pearson", mode = "matrix", - p_adj_method = "fdr", - p_adj_threshold = 2, - cor_threshold = NULL, + p.adj.method = "fdr", + p.adj.threshold = 2, + cor.threshold = NULL, sort = FALSE, - filter_self_correlations = FALSE, + filter.self.cor = FALSE, verbose = TRUE)) expect_error(getCrossAssociation(mae, experiment1 = 1, @@ -178,11 +178,11 @@ test_that("getCrossAssociation", { assay.type2 = "counts", method = "pearson", mode = "matrix", - p_adj_method = "fdr", - p_adj_threshold = TRUE, - cor_threshold = NULL, + p.adj.method = "fdr", + p.adj.threshold = TRUE, + cor.threshold = NULL, sort = FALSE, - filter_self_correlations = FALSE, + filter.self.cor = FALSE, verbose = TRUE)) expect_error(getCrossAssociation(mae, experiment1 = 1, @@ -191,11 +191,11 @@ test_that("getCrossAssociation", { assay.type2 = "counts", method = "pearson", mode = "matrix", - p_adj_method = "fdr", - p_adj_threshold = 0.1, - cor_threshold = 2, + p.adj.method = "fdr", + p.adj.threshold = 0.1, + cor.threshold = 2, sort = FALSE, - filter_self_correlations = FALSE, + filter.self.cor = FALSE, verbose = TRUE)) expect_error(getCrossAssociation(mae, experiment1 = 1, @@ -204,11 +204,11 @@ test_that("getCrossAssociation", { assay.type2 = "counts", method = "pearson", mode = "matrix", - p_adj_method = "fdr", - p_adj_threshold = 0.1, - cor_threshold = TRUE, + p.adj.method = "fdr", + p.adj.threshold = 0.1, + cor.threshold = TRUE, sort = FALSE, - filter_self_correlations = FALSE, + filter.self.cor = FALSE, verbose = TRUE)) expect_error(getCrossAssociation(mae, experiment1 = 1, @@ -217,11 +217,11 @@ test_that("getCrossAssociation", { assay.type2 = "counts", method = "pearson", mode = "matrix", - p_adj_method = "fdr", - p_adj_threshold = 0.1, - cor_threshold = NULL, + p.adj.method = "fdr", + p.adj.threshold = 0.1, + cor.threshold = NULL, sort = 1, - filter_self_correlations = FALSE, + filter.self.cor = FALSE, verbose = TRUE)) expect_error(getCrossAssociation(mae, experiment1 = 1, @@ -230,11 +230,11 @@ test_that("getCrossAssociation", { assay.type2 = "counts", method = "pearson", mode = "matrix", - p_adj_method = "fdr", - p_adj_threshold = 0.1, - cor_threshold = NULL, + p.adj.method = "fdr", + p.adj.threshold = 0.1, + cor.threshold = NULL, sort = TRUE, - filter_self_correlations = 1, + filter.self.cor = 1, verbose = TRUE)) expect_error(getCrossAssociation(mae, experiment1 = 1, @@ -243,11 +243,11 @@ test_that("getCrossAssociation", { assay.type2 = "counts", method = "pearson", mode = "matrix", - p_adj_method = "fdr", - p_adj_threshold = 0.1, - cor_threshold = NULL, + p.adj.method = "fdr", + p.adj.threshold = 0.1, + cor.threshold = NULL, sort = TRUE, - filter_self_correlations = TRUE, + filter.self.cor = TRUE, verbose = 1)) expect_error(getCrossAssociation(mae[[1]], assay(mae1[[2]]), @@ -257,11 +257,11 @@ test_that("getCrossAssociation", { assay.type2 = "counts", method = "pearson", mode = "matrix", - p_adj_method = "fdr", - p_adj_threshold = 0.1, - cor_threshold = NULL, + p.adj.method = "fdr", + p.adj.threshold = 0.1, + cor.threshold = NULL, sort = TRUE, - filter_self_correlations = TRUE, + filter.self.cor = TRUE, verbose = 1)) expect_error(getCrossAssociation(mae[[1]], NULL, @@ -271,56 +271,56 @@ test_that("getCrossAssociation", { assay.type2 = "counts", method = "pearson", mode = "matrix", - p_adj_method = "fdr", - p_adj_threshold = 0.1, - cor_threshold = NULL, + p.adj.method = "fdr", + p.adj.threshold = 0.1, + cor.threshold = NULL, sort = TRUE, - filter_self_correlations = TRUE, + filter.self.cor = TRUE, verbose = 1)) expect_error(getCrossAssociation(mae, experiment1 = 3, experiment2 = 2, assay.type1 = "counts", assay.type2 = "counts", - colData_variable1 = FALSE, - colData_variable2 = NULL, + col.var1 = FALSE, + col.var2 = NULL, method = "spearman", mode = "table", - p_adj_method = "fdr", - p_adj_threshold = 0.05, - cor_threshold = NULL, + p.adj.method = "fdr", + p.adj.threshold = 0.05, + cor.threshold = NULL, sort = FALSE, - filter_self_correlations = FALSE, + filter.self.cor = FALSE, verbose = TRUE)) expect_error(getCrossAssociation(mae, experiment1 = 3, experiment2 = 2, assay.type1 = "counts", assay.type2 = "counts", - colData_variable1 = NULL, - colData_variable2 = 1, + col.var1 = NULL, + col.var2 = 1, method = "spearman", mode = "table", - p_adj_method = "fdr", - p_adj_threshold = 0.05, - cor_threshold = NULL, + p.adj.method = "fdr", + p.adj.threshold = 0.05, + cor.threshold = NULL, sort = FALSE, - filter_self_correlations = FALSE, + filter.self.cor = FALSE, verbose = TRUE)) expect_error(getCrossAssociation(mae, experiment1 = 3, experiment2 = 2, assay.type1 = "counts", assay.type2 = "counts", - colData_variable1 = "test", - colData_variable2 = NULL, + col.var1 = "test", + col.var2 = NULL, method = "spearman", mode = "table", - p_adj_method = "fdr", - p_adj_threshold = 0.05, - cor_threshold = NULL, + p.adj.method = "fdr", + p.adj.threshold = 0.05, + cor.threshold = NULL, sort = FALSE, - filter_self_correlations = FALSE, + filter.self.cor = FALSE, verbose = TRUE)) ############################# Test input end ############################# # Test that association is calculated correctly with numeric data @@ -335,9 +335,9 @@ test_that("getCrossAssociation", { # Calculate correlation cor <- getCrossAssociation(mae, method = "pearson", - p_adj_threshold = NULL, - show_warnings = FALSE, - test_significance = TRUE) + p.adj.threshold = NULL, + show.warnings = FALSE, + test.signif = TRUE) # Take only specific taxa and lipids df <- cor[cor$Var1 %in% c("Fusobacteria", "Campylobacter", "Actinomycetaceae") & cor$Var2 %in% c("PE(48:7)", "TG(50:0)", "SM(d18:1/18:0)"), ] @@ -371,63 +371,63 @@ test_that("getCrossAssociation", { mae_sub <- mae[1:10, 1:10] # Test that output is in correct type expect_true( is.data.frame( - getCrossAssociation(mae_sub, p_adj_threshold = NULL, - show_warnings = FALSE, test_significance = TRUE)) ) + getCrossAssociation(mae_sub, p.adj.threshold = NULL, + show.warnings = FALSE, test.signif = TRUE)) ) expect_true( is.data.frame(getCrossAssociation(mae_sub, - show_warnings = FALSE)) ) + show.warnings = FALSE)) ) # There should not be any p-values that are under 0 expect_true( is.null( - getCrossAssociation(mae_sub, p_adj_threshold = 0, - show_warnings = FALSE, - test_significance = TRUE)) ) + getCrossAssociation(mae_sub, p.adj.threshold = 0, + show.warnings = FALSE, + test.signif = TRUE)) ) # Test that output is in correct type expect_true( is.list( getCrossAssociation(mae_sub, mode = "matrix", - p_adj_threshold = NULL, - show_warnings = FALSE, - test_significance = TRUE)) ) + p.adj.threshold = NULL, + show.warnings = FALSE, + test.signif = TRUE)) ) expect_true( is.matrix(getCrossAssociation(mae_sub, mode = "matrix", - show_warnings = FALSE)) ) + show.warnings = FALSE)) ) # There should not be any p-values that are under 0 expect_true( is.null( getCrossAssociation(mae_sub, - p_adj_threshold = 0, + p.adj.threshold = 0, mode = "matrix", - show_warnings = FALSE, - test_significance = TRUE)) ) + show.warnings = FALSE, + test.signif = TRUE)) ) # When correlation between same assay is calculated, calculation is made faster # by not calculating duplicates expect_error(getCrossAssociation(mae, experiment1 = 1, experiment2 = 1, - show_warnings = FALSE, + show.warnings = FALSE, symmetric = "TRUE", - test_significance = TRUE)) + test.signif = TRUE)) expect_error(getCrossAssociation(mae, experiment1 = 1, experiment2 = 1, - show_warnings = FALSE, + show.warnings = FALSE, symmetric = 1, - test_significance = TRUE)) + test.signif = TRUE)) expect_error(getCrossAssociation(mae, experiment1 = 1, experiment2 = 1, - show_warnings = FALSE, + show.warnings = FALSE, symmetric = NULL, - test_significance = TRUE)) + test.signif = TRUE)) expect_error(getCrossAssociation(mae, experiment1 = 1, experiment2 = 1, - show_warnings = FALSE, + show.warnings = FALSE, symmetric = c(TRUE, TRUE), - test_significance = TRUE)) + test.signif = TRUE)) time <- system.time( cor <- getCrossAssociation(mae, experiment1 = 1, experiment2 = 1, - show_warnings = FALSE, + show.warnings = FALSE, symmetric = TRUE, - test_significance = TRUE) + test.signif = TRUE) ) time2 <- system.time( cor2 <- getCrossAssociation(mae, experiment1 = 1, experiment2 = 1, - show_warnings = FALSE, - test_significance = TRUE) + show.warnings = FALSE, + test.signif = TRUE) ) # Get random variables and test that their duplicates are equal for(i in 1:10 ){ @@ -453,14 +453,14 @@ test_that("getCrossAssociation", { experiment2 = tse2, paired = TRUE, MARGIN = 2, - show_warnings = FALSE, - test_significance = TRUE) + show.warnings = FALSE, + test.signif = TRUE) # Calculate all pairs cor <- getCrossAssociation(tse1, experiment2 = tse2, MARGIN = 2, - show_warnings = FALSE, - test_significance = TRUE) + show.warnings = FALSE, + test.signif = TRUE) # Take only pairs that are paired cor <- cor[cor$Var1 == cor$Var2, ] rownames(cor) <- NULL @@ -472,52 +472,52 @@ test_that("getCrossAssociation", { # Test that result does not depend on names (if there are equal names) tse <- mae[[1]] rownames(tse)[1:10] <- rep("Unknown", 10) - cor_table <- getCrossAssociation(tse, show_warnings = FALSE, - test_significance = TRUE) - cor_table_ref <- getCrossAssociation(mae[[1]], show_warnings = FALSE, - test_significance = TRUE) + cor_table <- getCrossAssociation(tse, show.warnings = FALSE, + test.signif = TRUE) + cor_table_ref <- getCrossAssociation(mae[[1]], show.warnings = FALSE, + test.signif = TRUE) expect_equal(cor_table[ , 3:5], cor_table_ref[ , 3:5]) - mat <- getCrossAssociation(tse, mode = "matrix", show_warnings = FALSE) + mat <- getCrossAssociation(tse, mode = "matrix", show.warnings = FALSE) expect_true( is.matrix(mat) ) expect_true(nrow(mat) == nrow(tse) && ncol(mat) == nrow(tse)) - mat <- getCrossAssociation(tse, mode = "matrix", show_warnings = FALSE, - cor_threshold = 0.8, - filter_self_correlation = TRUE) + mat <- getCrossAssociation(tse, mode = "matrix", show.warnings = FALSE, + cor.threshold = 0.8, + filter.self.cor = TRUE) expect_true(nrow(mat) < nrow(tse) && ncol(mat) < nrow(tse)) # Test user's own function expect_true( is.data.frame(getCrossAssociation(tse, method = "canberra", mode = "table", - show_warnings = T, - association_FUN = stats::dist) ) ) + show.warnings = T, + association.fun = stats::dist) ) ) expect_true( is.matrix( getCrossAssociation(tse, method = "bray", - show_warnings = FALSE, + show.warnings = FALSE, mode = "matrix", - association_FUN = vegan::vegdist, - test_significance = TRUE) ) ) + association.fun = vegan::vegdist, + test.signif = TRUE) ) ) expect_error( getCrossAssociation(tse, method = "bray", - show_warnings = FALSE, + show.warnings = FALSE, mode = "matrix", - association_FUN = DelayedMatrixStats::rowSums2, - test_significance = TRUE) ) + association.fun = DelayedMatrixStats::rowSums2, + test.signif = TRUE) ) # Test that output has right columns expect_equal(colnames(getCrossAssociation(tse, - show_warnings = FALSE)), + show.warnings = FALSE)), c("Var1", "Var2", "cor")) - expect_equal(colnames(getCrossAssociation(tse, show_warnings = FALSE, - test_significance = TRUE)), + expect_equal(colnames(getCrossAssociation(tse, show.warnings = FALSE, + test.signif = TRUE)), c("Var1", "Var2", "cor", "pval", "p_adj")) # Test that the table have same information with different levels - tab1 <- getCrossAssociation(tse, show_warnings = FALSE) + tab1 <- getCrossAssociation(tse, show.warnings = FALSE) tab1_levels1 <- levels(tab1$Var1) tab1_levels2 <- levels(tab1$Var2) tab1$Var1 <- as.character(tab1$Var1) tab1$Var2 <- as.character(tab1$Var2) - tab2 <- getCrossAssociation(tse, show_warnings = FALSE, sort = TRUE) + tab2 <- getCrossAssociation(tse, show.warnings = FALSE, sort = TRUE) tab2_levels1 <- levels(tab2$Var1) tab2_levels2 <- levels(tab2$Var2) tab2$Var1 <- as.character(tab2$Var1) @@ -529,15 +529,15 @@ test_that("getCrossAssociation", { # Test altexps altExps(tse) <- splitByRanks(tse) # Test that output has right columns - expect_equal(getCrossAssociation(tse, tse, show_warnings = FALSE, + expect_equal(getCrossAssociation(tse, tse, show.warnings = FALSE, altexp1 = 1, altexp2 = "Phylum"), getCrossAssociation(altExps(tse)[[1]], altExp(tse, "Phylum"), - show_warnings = FALSE)) - expect_equal(getCrossAssociation(tse, tse, show_warnings = FALSE, + show.warnings = FALSE)) + expect_equal(getCrossAssociation(tse, tse, show.warnings = FALSE, altexp1 = "Family", altexp2 = NULL), getCrossAssociation(altExp(tse, "Family"), tse, - show_warnings = FALSE)) + show.warnings = FALSE)) # Test colData_variable # Check that all the correct names are included @@ -545,7 +545,7 @@ test_that("getCrossAssociation", { tse <- estimateDiversity(tse, index = indices) res <- getCrossAssociation(tse, tse, assay.type1 = "counts", - colData_variable2 = indices) + col.var2 = indices) unique_var1 <- unfactor(unique(res$Var1)) unique_var2 <- unfactor(unique(res$Var2)) rownames <- rownames(tse) @@ -555,12 +555,12 @@ test_that("getCrossAssociation", { # Check that assay.type is disabled res2 <- getCrossAssociation(tse, assay.type1 = "counts", assay.type2 = "counts", - colData_variable2 = indices) + col.var2 = indices) expect_equal(res, res2) colData(tse)[, "test"] <- rep("a") expect_error( - getCrossAssociation(tse, colData_variable2 = c("shannon", "test"))) + getCrossAssociation(tse, col.var2 = c("shannon", "test"))) } }) diff --git a/tests/testthat/test-5prevalence.R b/tests/testthat/test-5prevalence.R index f780bf147..3273023d8 100644 --- a/tests/testthat/test-5prevalence.R +++ b/tests/testthat/test-5prevalence.R @@ -5,40 +5,40 @@ test_that("getPrevalence", { data(GlobalPatterns, package="mia") expect_error(getPrevalence(GlobalPatterns, detection="test"), "'detection' must be a single numeric value or coercible to one") - expect_error(getPrevalence(GlobalPatterns, include_lowest="test"), - "'include_lowest' must be TRUE or FALSE") + expect_error(getPrevalence(GlobalPatterns, include.lowest="test"), + "'include.lowest' must be TRUE or FALSE") expect_error(getPrevalence(GlobalPatterns, sort="test"), "'sort' must be TRUE or FALSE") - expect_error(getPrevalence(GlobalPatterns, as_relative="test"), - "'as_relative' must be TRUE or FALSE") + expect_error(getPrevalence(GlobalPatterns, as.relative="test"), + "'as.relative' must be TRUE or FALSE") expect_error(getPrevalence(GlobalPatterns, assay.type="test"), "'assay.type' must be a valid name") # Output should be always a frequency between 0 to 1 - pr <- getPrevalence(GlobalPatterns, detection=0.1/100, as_relative=TRUE) + pr <- getPrevalence(GlobalPatterns, detection=0.1/100, as.relative=TRUE) expect_true(min(pr) >= 0 && max(pr) <= 1) - pr <- getPrevalence(GlobalPatterns, detection=0.1/100, as_relative=FALSE) + pr <- getPrevalence(GlobalPatterns, detection=0.1/100, as.relative=FALSE) expect_true(min(pr) >= 0 && max(pr) <= 1) - # Same prevalences should be returned for as_relative T/F in certain cases. - pr1 <- getPrevalence(GlobalPatterns, detection=1, include_lowest=TRUE, as_relative=FALSE) - pr2 <- getPrevalence(GlobalPatterns, detection=0/100, include_lowest=FALSE, as_relative=TRUE) + # Same prevalences should be returned for as.relative T/F in certain cases. + pr1 <- getPrevalence(GlobalPatterns, detection=1, include.lowest=TRUE, as.relative=FALSE) + pr2 <- getPrevalence(GlobalPatterns, detection=0/100, include.lowest=FALSE, as.relative=TRUE) expect_true(all(pr1 == pr2)) - # Same prevalences should be returned for as_relative T/F in certain cases. - pr1 <- getPrevalence(GlobalPatterns, detection=1, include_lowest=TRUE, as_relative=FALSE) - pr2 <- getPrevalence(GlobalPatterns, detection=0, include_lowest=FALSE, as_relative=FALSE) + # Same prevalences should be returned for as.relative T/F in certain cases. + pr1 <- getPrevalence(GlobalPatterns, detection=1, include.lowest=TRUE, as.relative=FALSE) + pr2 <- getPrevalence(GlobalPatterns, detection=0, include.lowest=FALSE, as.relative=FALSE) expect_true(all(pr1 == pr2)) # Different ways to use relative abundance should yield the same output - pr2 <- getPrevalence(GlobalPatterns, as_relative=TRUE, assay.type = "counts") + pr2 <- getPrevalence(GlobalPatterns, as.relative=TRUE, assay.type = "counts") GlobalPatterns <- transformAssay(GlobalPatterns, method="relabundance") - pr1 <- getPrevalence(GlobalPatterns, as_relative=FALSE, assay.type = "relabundance") + pr1 <- getPrevalence(GlobalPatterns, as.relative=FALSE, assay.type = "relabundance") expect_true(all(pr1 == pr2)) # Sorting should put the top values first pr <- getPrevalence(GlobalPatterns, sort=TRUE, detection = 0.1/100) expect_equal(as.vector(which.max(pr)), 1) - pr <- names(head(getPrevalence(GlobalPatterns, sort=TRUE, include_lowest = TRUE), 5L)) + pr <- names(head(getPrevalence(GlobalPatterns, sort=TRUE, include.lowest = TRUE), 5L)) actual <- getTop(GlobalPatterns, method="prevalence", top=5, @@ -54,12 +54,12 @@ test_that("getPrevalence", { gp_null <- GlobalPatterns rownames(gp_null) <- NULL - pr1 <- unname(getPrevalence(GlobalPatterns, detection=0.004, as_relative=TRUE)) - pr2 <- getPrevalence(gp_null, detection=0.004, as_relative=TRUE) + pr1 <- unname(getPrevalence(GlobalPatterns, detection=0.004, as.relative=TRUE)) + pr2 <- getPrevalence(gp_null, detection=0.004, as.relative=TRUE) expect_equal(pr1, pr2) - pr1 <- getPrevalence(GlobalPatterns, detection=0.004, as_relative=TRUE, rank = "Family") - pr2 <- getPrevalence(gp_null, detection=0.004, as_relative=TRUE, rank = "Family") + pr1 <- getPrevalence(GlobalPatterns, detection=0.004, as.relative=TRUE, rank = "Family") + pr2 <- getPrevalence(gp_null, detection=0.004, as.relative=TRUE, rank = "Family") expect_equal(pr1, pr2) # Check that na.rm works correctly @@ -104,13 +104,13 @@ test_that("getPrevalence", { remove <- c(15, 200) assay(tse, "counts")[remove, ] <- NA # Check that agglomeration works - tse_agg <- agglomerateByRank(tse, onRankOnly = FALSE, na.rm = FALSE, rank = rank) + tse_agg <- agglomerateByRank(tse, ignore.taxonomy = FALSE, na.rm = FALSE, rank = rank) expect_warning(ref <- getPrevalence(tse_agg, na.rm = FALSE)) expect_warning(res <- getPrevalence(tse, rank = "Genus", agg.na.rm = FALSE)) expect_true( all(res == ref, na.rm = TRUE) ) # tse_agg <- agglomerateByRank( - tse, onRankOnly = FALSE, na.rm = TRUE, rank = rank) + tse, ignore.taxonomy = FALSE, na.rm = TRUE, rank = rank) ref <- getPrevalence(tse_agg, na.rm = TRUE) res <- getPrevalence( tse, na.rm = TRUE, rank = "Genus", agg.na.rm = TRUE) @@ -124,42 +124,42 @@ test_that("getPrevalent", { expect_error(getPrevalent(GlobalPatterns, prevalence="test"), "'prevalence' must be a single numeric value or coercible to one") # Results compatible with getPrevalence - pr1 <- getPrevalent(GlobalPatterns, detection=0.1/100, as_relative=TRUE, sort=TRUE) - pr2 <- names(getPrevalence(GlobalPatterns, rank = "Kingdom", detection=0.1/100, as_relative=TRUE, sort=TRUE)) + pr1 <- getPrevalent(GlobalPatterns, detection=0.1/100, as.relative=TRUE, sort=TRUE) + pr2 <- names(getPrevalence(GlobalPatterns, rank = "Kingdom", detection=0.1/100, as.relative=TRUE, sort=TRUE)) expect_true(all(pr1 == pr2)) # Same sorting for toptaxa obtained in different ways - pr1 <- getPrevalent(GlobalPatterns, detection=0.1/100, as_relative=TRUE, sort=TRUE) - pr2 <- names(getPrevalence(GlobalPatterns, rank = "Kingdom", detection=0.1/100, as_relative=TRUE, sort=TRUE)) + pr1 <- getPrevalent(GlobalPatterns, detection=0.1/100, as.relative=TRUE, sort=TRUE) + pr2 <- names(getPrevalence(GlobalPatterns, rank = "Kingdom", detection=0.1/100, as.relative=TRUE, sort=TRUE)) expect_true(all(pr1 == pr2)) # Retrieved taxa are the same for counts and relative abundances - pr1 <- getPrevalent(GlobalPatterns, prevalence=0.1/100, as_relative=TRUE) - pr2 <- getPrevalent(GlobalPatterns, prevalence=0.1/100, as_relative=FALSE) + pr1 <- getPrevalent(GlobalPatterns, prevalence=0.1/100, as.relative=TRUE) + pr2 <- getPrevalent(GlobalPatterns, prevalence=0.1/100, as.relative=FALSE) expect_true(all(pr1 == pr2)) # Prevalence and detection threshold at 0 has the same impact on counts and relative abundances - pr1 <- getPrevalent(GlobalPatterns, detection=0, prevalence=0, as_relative=TRUE) - pr2 <- getPrevalent(GlobalPatterns, detection=0, prevalence=0, as_relative=FALSE) + pr1 <- getPrevalent(GlobalPatterns, detection=0, prevalence=0, as.relative=TRUE) + pr2 <- getPrevalent(GlobalPatterns, detection=0, prevalence=0, as.relative=FALSE) expect_true(all(pr1 == pr2)) # Check that works also when rownames is NULL gp_null <- GlobalPatterns rownames(gp_null) <- NULL - pr1 <- getPrevalent(GlobalPatterns, detection=0.0045, prevalence = 0.25, as_relative=TRUE) + pr1 <- getPrevalent(GlobalPatterns, detection=0.0045, prevalence = 0.25, as.relative=TRUE) pr1 <- which(rownames(GlobalPatterns) %in% pr1) - pr2 <- getPrevalent(gp_null, detection=0.0045, prevalence = 0.25, as_relative=TRUE) + pr2 <- getPrevalent(gp_null, detection=0.0045, prevalence = 0.25, as.relative=TRUE) expect_equal(pr1, pr2) # Test alias - alias <- getPrevalent(gp_null, detection=0.0045, prevalence = 0.25, as_relative=TRUE) + alias <- getPrevalent(gp_null, detection=0.0045, prevalence = 0.25, as.relative=TRUE) expect_equal(pr1, alias) pr1 <- getPrevalent(GlobalPatterns, detection=0.004, prevalence = 0.1, - as_relative=TRUE, rank = "Family") + as.relative=TRUE, rank = "Family") pr2 <- getPrevalent(gp_null, detection=0.004, prevalence = 0.1, - as_relative=TRUE, rank = "Family") + as.relative=TRUE, rank = "Family") expect_equal(pr1, pr2) }) @@ -184,12 +184,12 @@ test_that("getRare", { prevalent_taxa <- getPrevalent(GlobalPatterns, detection = 0, prevalence = 90/100, - include_lowest = FALSE) + include.lowest = FALSE) # Gets rare taxa rare_taxa <- getRare(GlobalPatterns, detection = 0, prevalence = 90/100, - include_lowest = FALSE) + include.lowest = FALSE) # Concatenates prevalent and rare taxa prevalent_and_rare_taxa <- c(prevalent_taxa, rare_taxa) @@ -219,13 +219,13 @@ test_that("getRare", { prevalence = 0.05, detection = 0.1, rank = rank, - include_lowest = TRUE, as_relative = TRUE) + include.lowest = TRUE, as.relative = TRUE) # Gets rare taxa rare_taxa <- getRare(GlobalPatterns, prevalence = 0.05, detection = 0.1, rank = rank, - include_lowest = TRUE, as_relative = TRUE) + include.lowest = TRUE, as.relative = TRUE) # Concatenates prevalent and rare taxa prevalent_and_rare_taxa <- c(prevalent_taxa, rare_taxa) @@ -246,19 +246,19 @@ test_that("getRare", { gp_null <- GlobalPatterns rownames(gp_null) <- NULL - pr1 <- getRare(GlobalPatterns, detection=0.0045, prevalence = 0.25, as_relative=TRUE) + pr1 <- getRare(GlobalPatterns, detection=0.0045, prevalence = 0.25, as.relative=TRUE) pr1 <- which(rownames(GlobalPatterns) %in% pr1) - pr2 <- getRare(gp_null, detection=0.0045, prevalence = 0.25, as_relative=TRUE) + pr2 <- getRare(gp_null, detection=0.0045, prevalence = 0.25, as.relative=TRUE) expect_equal(pr1, pr2) # Test lias - alias <- getRare(gp_null, detection=0.0045, prevalence = 0.25, as_relative=TRUE) + alias <- getRare(gp_null, detection=0.0045, prevalence = 0.25, as.relative=TRUE) expect_equal(pr1, alias) pr1 <- getRare(GlobalPatterns, detection=0.004, prevalence = 0.1, - as_relative=TRUE, rank = "Family") + as.relative=TRUE, rank = "Family") pr2 <- getRare(gp_null, detection=0.004, prevalence = 0.1, - as_relative=TRUE, rank = "Family") + as.relative=TRUE, rank = "Family") expect_equal(pr1, pr2) }) @@ -273,19 +273,19 @@ test_that("subsetByPrevalent", { # Results compatible with getPrevalent pr1 <- rownames(subsetByPrevalent( GlobalPatterns, rank = "Class", detection=0.1/100, - as_relative=TRUE, sort=TRUE)) + as.relative=TRUE, sort=TRUE)) pr2 <- getPrevalent(GlobalPatterns, rank = "Class", detection=0.1/100, - as_relative=TRUE, sort=TRUE) + as.relative=TRUE, sort=TRUE) expect_true(all(pr1 == pr2)) # Retrieved taxa are the same for counts and relative abundances - pr1 <- assay(subsetByPrevalent(GlobalPatterns, prevalence=0.1/100, as_relative=TRUE), "counts") - pr2 <- assay(subsetByPrevalent(GlobalPatterns, prevalence=0.1/100, as_relative=FALSE), "counts") + pr1 <- assay(subsetByPrevalent(GlobalPatterns, prevalence=0.1/100, as.relative=TRUE), "counts") + pr2 <- assay(subsetByPrevalent(GlobalPatterns, prevalence=0.1/100, as.relative=FALSE), "counts") expect_true(all(pr1 == pr2)) # Prevalence and detection threshold at 0 has the same impact on counts and relative abundances - pr1 <- rownames(subsetByPrevalent(GlobalPatterns, detection=0, prevalence=0, as_relative=TRUE)) - pr2 <- rownames(subsetByPrevalent(GlobalPatterns, detection=0, prevalence=0, as_relative=FALSE)) + pr1 <- rownames(subsetByPrevalent(GlobalPatterns, detection=0, prevalence=0, as.relative=TRUE)) + pr2 <- rownames(subsetByPrevalent(GlobalPatterns, detection=0, prevalence=0, as.relative=FALSE)) expect_true(all(pr1 == pr2)) # Check that works also when rownames is NULL @@ -321,29 +321,29 @@ test_that("subsetByRare", { # Results compatible with getRare pr1 <- rownames(subsetByRare( GlobalPatterns, rank = "Phylum", detection=0.1/100, - as_relative=TRUE, sort=TRUE)) + as.relative=TRUE, sort=TRUE)) pr2 <- getRare(GlobalPatterns, rank = "Phylum", detection=0.1/100, - as_relative=TRUE, sort=TRUE) + as.relative=TRUE, sort=TRUE) expect_true(all(pr1 == pr2)) # Retrieved taxa are the same for counts and relative abundances - pr1 <- assay(subsetByRare(GlobalPatterns, prevalence=0.1/100, as_relative=TRUE), "counts") - pr2 <- assay(subsetByRare(GlobalPatterns, prevalence=0.1/100, as_relative=FALSE), "counts") + pr1 <- assay(subsetByRare(GlobalPatterns, prevalence=0.1/100, as.relative=TRUE), "counts") + pr2 <- assay(subsetByRare(GlobalPatterns, prevalence=0.1/100, as.relative=FALSE), "counts") expect_true(all(pr1 == pr2)) # Prevalence and detection threshold at 0 has the same impact on counts and relative abundances - pr1 <- rownames(subsetByRare(GlobalPatterns, detection=0, prevalence=0, as_relative=TRUE)) - pr2 <- rownames(subsetByRare(GlobalPatterns, detection=0, prevalence=0, as_relative=FALSE)) + pr1 <- rownames(subsetByRare(GlobalPatterns, detection=0, prevalence=0, as.relative=TRUE)) + pr2 <- rownames(subsetByRare(GlobalPatterns, detection=0, prevalence=0, as.relative=FALSE)) expect_true(all(pr1 == pr2)) # subsetByRare + subsetByPrevalent should include all the taxa in OTU level d <- runif(1, 0.0001, 0.1) p <- runif(1, 0.0001, 0.5) rare <- rownames(subsetByRare(GlobalPatterns, detection=d, prevalence=p, - as_relative=TRUE)) + as.relative=TRUE)) prevalent <- rownames(subsetByPrevalent(GlobalPatterns, detection=d, prevalence=p, - as_relative=TRUE)) + as.relative=TRUE)) all_taxa <- c(rare, prevalent) @@ -375,8 +375,8 @@ test_that("subsetByRare", { test_that("agglomerateByPrevalence", { data(GlobalPatterns, package="mia") - expect_error(agglomerateByPrevalence(GlobalPatterns, other_label=TRUE), - "'other_label' must be a single character value") + expect_error(agglomerateByPrevalence(GlobalPatterns, other.label=TRUE), + "'other.label' must be a single character value") actual <- agglomerateByPrevalence(GlobalPatterns, rank = "Kingdom") expect_s4_class(actual,class(GlobalPatterns)) expect_equal(dim(actual),c(2,26)) @@ -385,8 +385,8 @@ test_that("agglomerateByPrevalence", { rank = "Phylum", detection = 1/100, prevalence = 50/100, - as_relative = TRUE, - other_label = "test") + as.relative = TRUE, + other.label = "test") expect_s4_class(actual,class(GlobalPatterns)) expect_equal(dim(actual),c(6,26)) expect_equal(rowData(actual)[6,"Phylum"],"test") @@ -395,20 +395,20 @@ test_that("agglomerateByPrevalence", { rank = NULL, detection = 0.0001, prevalence = 50/100, - as_relative = TRUE, - other_label = "test") + as.relative = TRUE, + other.label = "test") expect_equal(agglomerateByPrevalence(GlobalPatterns, rank = NULL, detection = 0.0001, prevalence = 50/100, - as_relative = TRUE, - other_label = "test"), + as.relative = TRUE, + other.label = "test"), agglomerateByPrevalence(GlobalPatterns, rank = NULL, detection = 0.0001, prevalence = 50/100, - as_relative = TRUE, - other_label = "test")) + as.relative = TRUE, + other.label = "test")) expect_s4_class(actual,class(GlobalPatterns)) expect_equal(dim(actual),c(6,26)) expect_true(all(is.na(rowData(actual)[6,]))) diff --git a/tests/testthat/test-5runDPCoA.R b/tests/testthat/test-5runDPCoA.R index d5146d3fa..95848ca3e 100644 --- a/tests/testthat/test-5runDPCoA.R +++ b/tests/testthat/test-5runDPCoA.R @@ -15,53 +15,53 @@ test_that("addDPCoA", { # ERRORs expect_error( - addDPCoA(esophagus, assay.type = "test", tree_name = "phylo", ncomponents = 2, ntop = NULL, - subset_row = NULL, scale = FALSE, transposed = FALSE) + addDPCoA(esophagus, assay.type = "test", tree.name = "phylo", ncomponents = 2, ntop = NULL, + subset.row = NULL, scale = FALSE, transposed = FALSE) ) expect_error( - addDPCoA(esophagus, assay.type = 1, tree_name = "phylo", ncomponents = 2, ntop = NULL, - subset_row = NULL, scale = FALSE, transposed = FALSE) + addDPCoA(esophagus, assay.type = 1, tree.name = "phylo", ncomponents = 2, ntop = NULL, + subset.row = NULL, scale = FALSE, transposed = FALSE) ) expect_error( - addDPCoA(esophagus, assay.type = TRUE, tree_name = "phylo", ncomponents = 2, ntop = NULL, - subset_row = NULL, scale = FALSE, transposed = FALSE) + addDPCoA(esophagus, assay.type = TRUE, tree.name = "phylo", ncomponents = 2, ntop = NULL, + subset.row = NULL, scale = FALSE, transposed = FALSE) ) expect_error( - addDPCoA(esophagus, assay.type = "counts", tree_name = "test", ncomponents = 2, ntop = NULL, - subset_row = NULL, scale = FALSE, transposed = FALSE) + addDPCoA(esophagus, assay.type = "counts", tree.name = "test", ncomponents = 2, ntop = NULL, + subset.row = NULL, scale = FALSE, transposed = FALSE) ) expect_error( - addDPCoA(esophagus, assay.type = "counts", tree_name = 1, ncomponents = 2, ntop = NULL, - subset_row = NULL, scale = FALSE, transposed = FALSE) + addDPCoA(esophagus, assay.type = "counts", tree.name = 1, ncomponents = 2, ntop = NULL, + subset.row = NULL, scale = FALSE, transposed = FALSE) ) expect_error( - addDPCoA(esophagus, assay.type = "counts", tree_name = "phylo", ncomponents = TRUE, ntop = NULL, - subset_row = NULL, scale = FALSE, transposed = FALSE) + addDPCoA(esophagus, assay.type = "counts", tree.name = "phylo", ncomponents = TRUE, ntop = NULL, + subset.row = NULL, scale = FALSE, transposed = FALSE) ) expect_error( - addDPCoA(esophagus, assay.type = "counts", tree_name = "phylo", ncomponents = "test", ntop = NULL, - subset_row = NULL, scale = FALSE, transposed = FALSE) + addDPCoA(esophagus, assay.type = "counts", tree.name = "phylo", ncomponents = "test", ntop = NULL, + subset.row = NULL, scale = FALSE, transposed = FALSE) ) expect_error( - addDPCoA(esophagus, assay.type = "counts", tree_name = "phylo", ncomponents = "test", ntop = "test", - subset_row = NULL, scale = FALSE, transposed = FALSE) + addDPCoA(esophagus, assay.type = "counts", tree.name = "phylo", ncomponents = "test", ntop = "test", + subset.row = NULL, scale = FALSE, transposed = FALSE) ) expect_error( - addDPCoA(esophagus, assay.type = "counts", tree_name = "phylo", ncomponents = 1.3, ntop = "test", - subset_row = NULL, scale = FALSE, transposed = FALSE) + addDPCoA(esophagus, assay.type = "counts", tree.name = "phylo", ncomponents = 1.3, ntop = "test", + subset.row = NULL, scale = FALSE, transposed = FALSE) ) expect_error( addDPCoA(esophagus, name = c("test", "test2"), assay.type = "counts", - tree_name = "phylo", ncomponents = 1.3, ntop = "test", - subset_row = NULL, scale = FALSE, transposed = FALSE) + tree.name = "phylo", ncomponents = 1.3, ntop = "test", + subset.row = NULL, scale = FALSE, transposed = FALSE) ) expect_error( - addDPCoA(esophagus, name = 1, assay.type = "counts", tree_name = "phylo", ncomponents = 1.3, ntop = "test", - subset_row = NULL, scale = FALSE, transposed = FALSE) + addDPCoA(esophagus, name = 1, assay.type = "counts", tree.name = "phylo", ncomponents = 1.3, ntop = "test", + subset.row = NULL, scale = FALSE, transposed = FALSE) ) data(GlobalPatterns, package="mia") tse <- mergeSEs(esophagus, GlobalPatterns) # expect_warning(addDPCoA(tse)) - # expect_warning(addDPCoA(tse, tree_name = "phylo.1")) + # expect_warning(addDPCoA(tse, tree.name = "phylo.1")) }) diff --git a/tests/testthat/test-5runNMDS.R b/tests/testthat/test-5runNMDS.R index d413da167..83092edff 100644 --- a/tests/testthat/test-5runNMDS.R +++ b/tests/testthat/test-5runNMDS.R @@ -8,7 +8,7 @@ test_that("addNMDS", { actual <- getNMDS(se) expect_true(is.matrix(actual)) expect_equal(dim(actual),c(10,2)) - actual2 <- getNMDS(se,nmds="monoMDS",pc=FALSE,scaling=FALSE) + actual2 <- getNMDS(se,nmds.fun="monoMDS",pc=FALSE,scaling=FALSE) expect_true(is.matrix(actual)) expect_equal(dim(actual),c(10,2)) expect_true(sum(actual2 - actual) < 0.00001) diff --git a/tests/testthat/test-8subsample.R b/tests/testthat/test-8subsample.R index a2305ce1b..325084dbc 100644 --- a/tests/testthat/test-8subsample.R +++ b/tests/testthat/test-8subsample.R @@ -5,7 +5,7 @@ test_that("rarefyAssay", { data(GlobalPatterns, package="mia") expect_warning(tse.subsampled <- rarefyAssay(GlobalPatterns, - min_size = 60000, + sample = 60000, name = "subsampled", replace = TRUE)) # check class @@ -38,7 +38,7 @@ test_that("rarefyAssay", { seed = 1938 set.seed(seed) expect_warning(tse.subsampled.rp <- rarefyAssay(GlobalPatterns, - min_size = 60000, + sample = 60000, name = "subsampled", replace = FALSE)) diff --git a/vignettes/mia.Rmd b/vignettes/mia.Rmd index bfe586d04..687f0794d 100644 --- a/vignettes/mia.Rmd +++ b/vignettes/mia.Rmd @@ -168,7 +168,7 @@ taxa <- rowData(altExp(tse,"Species"))[,taxonomyRanks(tse)] taxa_res <- resolveLoop(as.data.frame(taxa)) taxa_tree <- toTree(data = taxa_res) taxa_tree$tip.label <- getTaxonomyLabels(altExp(tse,"Species")) -rowNodeLab <- getTaxonomyLabels(altExp(tse,"Species"), make_unique = FALSE) +rowNodeLab <- getTaxonomyLabels(altExp(tse,"Species"), make.unique = FALSE) altExp(tse,"Species") <- changeTree(altExp(tse,"Species"), rowTree = taxa_tree, rowNodeLab = rowNodeLab) @@ -194,7 +194,7 @@ Sub-sampling to equal number of counts per sample. Also known as rarefying. data(GlobalPatterns, package = "mia") tse.subsampled <- rarefyAssay(GlobalPatterns, - min_size = 60000, + sample = 60000, name = "subsampled", replace = TRUE, seed = 1938) @@ -320,8 +320,8 @@ To generate tidy data as used and required in most of the tidyverse, ```{r} molten_data <- meltAssay(tse, assay.type = "counts", - add_row_data = TRUE, - add_col_data = TRUE + add.row = TRUE, + add.col = TRUE ) molten_data ```