From 1b9ed9ffa7fed137bbeb562110185b73f0056978 Mon Sep 17 00:00:00 2001 From: Muluh <127390183+Daenarys8@users.noreply.github.com> Date: Wed, 12 Jun 2024 09:33:18 +0100 Subject: [PATCH] remove only empty rank cols (#572) Signed-off-by: Daena Rys Co-authored-by: Tuomas Borman <60338854+TuomasBorman@users.noreply.github.com> Co-authored-by: TuomasBorman --- DESCRIPTION | 2 +- R/agglomerate.R | 24 +++++++++------- tests/testthat/test-3agglomerate.R | 45 ++++++++++++++++++++---------- 3 files changed, 45 insertions(+), 26 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 35a4c6914..982ec444c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: mia Type: Package -Version: 1.13.21 +Version: 1.13.22 Authors@R: c(person(given = "Felix G.M.", family = "Ernst", role = c("aut"), email = "felix.gm.ernst@outlook.com", diff --git a/R/agglomerate.R b/R/agglomerate.R index 1f3a2987e..81cef8fc8 100644 --- a/R/agglomerate.R +++ b/R/agglomerate.R @@ -390,8 +390,8 @@ setMethod( x } -# This function removes empty columns from rowdata. (Those that include only -# NA values) +# This function removes empty rank columns from rowdata. (Those that include +# only NA values) .remove_NA_cols_from_rowdata <- function(x, remove_empty_ranks = FALSE, ...){ # Check remove_empty_ranks if( !.is_a_bool(remove_empty_ranks) ){ @@ -400,14 +400,18 @@ setMethod( } # If user wants to remove those columns if( remove_empty_ranks ){ - # Get rowData - rd <- rowData(x) - # Does teh column include data? - columns_including_data <- apply(rd, 2, function(x){!all(is.na(x))}) - # Subset data so that it includes only columns that include data - rd <- rd[, columns_including_data] - # Assign it back to SE - rowData(x) <- rd + # Get columns that include taxonomy information + rank_cols <- taxonomyRanks(x) + # Get rowData with only taxonomy + rd <- rowData(x)[ , rank_cols, drop = FALSE] + # Remove taxonomy from rowData + rowData(x) <- rowData(x)[ + , !colnames(rowData(x)) %in% rank_cols, drop = FALSE] + # Subset data so that it includes only rank columns that include data + non_empty_ranks <- apply(rd, 2, function(x) !all(is.na(x))) + rd <- rd[ , non_empty_ranks, drop = FALSE] + # Adding taxonomy back to SE + rowData(x) <- cbind(rowData(x), rd) } return(x) } diff --git a/tests/testthat/test-3agglomerate.R b/tests/testthat/test-3agglomerate.R index e1f1e8d82..01913e75f 100644 --- a/tests/testthat/test-3agglomerate.R +++ b/tests/testthat/test-3agglomerate.R @@ -121,6 +121,36 @@ test_that("agglomerate", { expect_equal(nrow(test0), 945) expect_equal(nrow(test1), 2307) + # Test that remove_empty_ranks work + expect_error( + agglomerateByRank(tse, rank = "Class", remove_empty_ranks = NULL)) + expect_error( + agglomerateByRank(tse, rank = "Class", remove_empty_ranks = "NULL")) + expect_error( + agglomerateByRank(tse, rank = "Class", remove_empty_ranks = 1)) + expect_error( + agglomerateByRank( + tse, rank = "Class", remove_empty_ranks = c(TRUE, TRUE))) + + # Add a column to rowData(se) to test that only NA rank columns are removed + # when remove_empty_ranks = TRUE + rank <- "Class" + rowData(tse)[["test"]] <- rep(NA, nrow(rowData(tse))) + x <- agglomerateByRank(tse, rank = rank) + rd1 <- rowData(x) + x <- agglomerateByRank(tse, rank = rank, remove_empty_ranks = TRUE) + rd2 <- rowData(x) + cols <- taxonomyRanks(tse)[ seq_len(which(taxonomyRanks(tse) == "Class")) ] + cols <- c(cols, "test") + expect_equal(rd1[, cols], rd2[, cols]) + expect_true( ncol(rd1) > ncol(rd2) ) + # Test that make_unique work + uniq <- agglomerateByRank(tse, rank = "Species", na.rm = FALSE) + not_uniq <- agglomerateByRank( + tse, rank = "Species", make_unique = FALSE, na.rm = FALSE) + expect_true( !any( duplicated(rownames(uniq)) ) ) + expect_true( any( duplicated(rownames(not_uniq)) ) ) + # Load data from miaTime package skip_if_not(require("miaTime", quietly = TRUE)) data(SilvermanAGutData) @@ -165,19 +195,4 @@ test_that("agglomerate", { "NYTDRRKDVHNKNDRVGRNDRSBRRAWTBYNHRKKKWRSSRKKRAAWKSSKWR", "RWDWTNDBRVRRAMHHCMRDKKSSRARGSSVSYYHNYBRRVHNDNNHYKRMVV", "YKVRDNNNSRAARSBDKGGKK")) - # Test that remove_empty_ranks work - expect_error(agglomerateByRank(se, rank = "Class", remove_empty_ranks = NULL)) - expect_error(agglomerateByRank(se, rank = "Class", remove_empty_ranks = "NULL")) - expect_error(agglomerateByRank(se, rank = "Class", remove_empty_ranks = 1)) - expect_error(agglomerateByRank(se, rank = "Class", remove_empty_ranks = c(TRUE, TRUE))) - x <- agglomerateByRank(se, rank = "Class") - rd1 <- rowData(x)[, 1:3] - x <- agglomerateByRank(se, rank = "Class", remove_empty_ranks = TRUE) - rd2 <- rowData(x) - expect_equal(rd1, rd2) - # Test that make_unique work - uniq <- agglomerateByRank(se, rank = "Species") - not_uniq <- agglomerateByRank(se, rank = "Species", make_unique = FALSE) - expect_true( !any( duplicated(rownames(uniq)) ) ) - expect_true( any( duplicated(rownames(not_uniq)) ) ) })