From 1b9ed9ffa7fed137bbeb562110185b73f0056978 Mon Sep 17 00:00:00 2001
From: Muluh <127390183+Daenarys8@users.noreply.github.com>
Date: Wed, 12 Jun 2024 09:33:18 +0100
Subject: [PATCH] remove only empty rank cols (#572)

Signed-off-by: Daena Rys <rysdaena8@gmail.com>
Co-authored-by: Tuomas Borman <60338854+TuomasBorman@users.noreply.github.com>
Co-authored-by: TuomasBorman <tvborm@utu.fi>
---
 DESCRIPTION                        |  2 +-
 R/agglomerate.R                    | 24 +++++++++-------
 tests/testthat/test-3agglomerate.R | 45 ++++++++++++++++++++----------
 3 files changed, 45 insertions(+), 26 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 35a4c6914..982ec444c 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: mia
 Type: Package
-Version: 1.13.21
+Version: 1.13.22
 Authors@R:
     c(person(given = "Felix G.M.", family = "Ernst", role = c("aut"),
              email = "felix.gm.ernst@outlook.com",
diff --git a/R/agglomerate.R b/R/agglomerate.R
index 1f3a2987e..81cef8fc8 100644
--- a/R/agglomerate.R
+++ b/R/agglomerate.R
@@ -390,8 +390,8 @@ setMethod(
         x
     }
 
-# This function removes empty columns from rowdata. (Those that include only
-# NA values)
+# This function removes empty rank columns from rowdata. (Those that include
+# only NA values)
 .remove_NA_cols_from_rowdata <- function(x, remove_empty_ranks = FALSE, ...){
     # Check remove_empty_ranks
     if( !.is_a_bool(remove_empty_ranks) ){
@@ -400,14 +400,18 @@ setMethod(
     }
     # If user wants to remove those columns
     if( remove_empty_ranks ){
-        # Get rowData
-        rd <- rowData(x)
-        # Does teh column include data?
-        columns_including_data <- apply(rd, 2, function(x){!all(is.na(x))})
-        # Subset data so that it includes only columns that include data
-        rd <- rd[, columns_including_data]
-        # Assign it back to SE
-        rowData(x) <- rd
+        # Get columns that include taxonomy information
+        rank_cols <- taxonomyRanks(x)
+        # Get rowData with only taxonomy
+        rd <- rowData(x)[ , rank_cols, drop = FALSE]
+        # Remove taxonomy from rowData
+        rowData(x) <- rowData(x)[
+            , !colnames(rowData(x)) %in% rank_cols, drop = FALSE]
+        # Subset data so that it includes only rank columns that include data
+        non_empty_ranks <- apply(rd, 2, function(x) !all(is.na(x)))
+        rd <- rd[ , non_empty_ranks, drop = FALSE]
+        # Adding taxonomy back to SE
+        rowData(x) <- cbind(rowData(x), rd)
     }
     return(x)
 }
diff --git a/tests/testthat/test-3agglomerate.R b/tests/testthat/test-3agglomerate.R
index e1f1e8d82..01913e75f 100644
--- a/tests/testthat/test-3agglomerate.R
+++ b/tests/testthat/test-3agglomerate.R
@@ -121,6 +121,36 @@ test_that("agglomerate", {
     expect_equal(nrow(test0), 945)
     expect_equal(nrow(test1), 2307)
     
+    # Test that remove_empty_ranks work
+    expect_error(
+        agglomerateByRank(tse, rank = "Class", remove_empty_ranks = NULL))
+    expect_error(
+        agglomerateByRank(tse, rank = "Class", remove_empty_ranks = "NULL"))
+    expect_error(
+        agglomerateByRank(tse, rank = "Class", remove_empty_ranks = 1))
+    expect_error(
+        agglomerateByRank(
+            tse, rank = "Class", remove_empty_ranks = c(TRUE, TRUE)))
+    
+    # Add a column to rowData(se) to test that only NA rank columns are removed
+    # when remove_empty_ranks = TRUE
+    rank <- "Class"
+    rowData(tse)[["test"]] <- rep(NA, nrow(rowData(tse)))
+    x <- agglomerateByRank(tse, rank = rank)
+    rd1 <- rowData(x)
+    x <- agglomerateByRank(tse, rank = rank, remove_empty_ranks = TRUE)
+    rd2 <- rowData(x)
+    cols <- taxonomyRanks(tse)[ seq_len(which(taxonomyRanks(tse) == "Class")) ]
+    cols <- c(cols, "test")
+    expect_equal(rd1[, cols], rd2[, cols])
+    expect_true( ncol(rd1) > ncol(rd2) )
+    # Test that make_unique work
+    uniq <- agglomerateByRank(tse, rank = "Species", na.rm = FALSE)
+    not_uniq <- agglomerateByRank(
+        tse, rank = "Species", make_unique = FALSE, na.rm = FALSE)
+    expect_true( !any( duplicated(rownames(uniq)) ) )
+    expect_true( any( duplicated(rownames(not_uniq)) ) )
+    
     # Load data from miaTime package
     skip_if_not(require("miaTime", quietly = TRUE))
     data(SilvermanAGutData)
@@ -165,19 +195,4 @@ test_that("agglomerate", {
                         "NYTDRRKDVHNKNDRVGRNDRSBRRAWTBYNHRKKKWRSSRKKRAAWKSSKWR",
                         "RWDWTNDBRVRRAMHHCMRDKKSSRARGSSVSYYHNYBRRVHNDNNHYKRMVV",
                         "YKVRDNNNSRAARSBDKGGKK"))
-    # Test that remove_empty_ranks work
-    expect_error(agglomerateByRank(se, rank = "Class", remove_empty_ranks = NULL))
-    expect_error(agglomerateByRank(se, rank = "Class", remove_empty_ranks = "NULL"))
-    expect_error(agglomerateByRank(se, rank = "Class", remove_empty_ranks = 1))
-    expect_error(agglomerateByRank(se, rank = "Class", remove_empty_ranks = c(TRUE, TRUE)))
-    x <- agglomerateByRank(se, rank = "Class")
-    rd1 <- rowData(x)[, 1:3]
-    x <- agglomerateByRank(se, rank = "Class", remove_empty_ranks = TRUE)
-    rd2 <- rowData(x)
-    expect_equal(rd1, rd2)
-    # Test that make_unique work
-    uniq <- agglomerateByRank(se, rank = "Species")
-    not_uniq <- agglomerateByRank(se, rank = "Species", make_unique = FALSE)
-    expect_true( !any( duplicated(rownames(uniq)) ) )
-    expect_true( any( duplicated(rownames(not_uniq)) ) )
 })